summaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/arrow/patches/pr40418-support-numpy-2.patch
blob: c89732131f5ebb9861d2d912f576c09eabdfbe76 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
--- contrib/libs/apache/arrow/cpp/src/arrow/python/arrow_to_pandas.cc	(index)
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/arrow_to_pandas.cc	(working tree)
@@ -234,7 +234,8 @@ Status SetBufferBase(PyArrayObject* arr, const std::shared_ptr<Buffer>& buffer) 
 }
 
 inline void set_numpy_metadata(int type, const DataType* datatype, PyArray_Descr* out) {
-  auto metadata = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(out->c_metadata);
+  auto metadata =
+      reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(out));
   if (type == NPY_DATETIME) {
     if (datatype->id() == Type::TIMESTAMP) {
       const auto& timestamp_type = checked_cast<const TimestampType&>(*datatype);
@@ -255,7 +256,7 @@ Status PyArray_NewFromPool(int nd, npy_intp* dims, PyArray_Descr* descr, MemoryP
   //
   // * Track allocations
   // * Get better performance through custom allocators
-  int64_t total_size = descr->elsize;
+  int64_t total_size = PyDataType_ELSIZE(descr);
   for (int i = 0; i < nd; ++i) {
     total_size *= dims[i];
   }
@@ -511,8 +512,9 @@ class PandasWriter {
 
   void SetDatetimeUnit(NPY_DATETIMEUNIT unit) {
     PyAcquireGIL lock;
-    auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(
-        PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))->c_metadata);
+    auto date_dtype =
+        reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(
+            PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))));
     date_dtype->meta.base = unit;
   }
 
--- contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_convert.cc	(index)
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_convert.cc	(working tree)
@@ -46,7 +46,7 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
     PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
     auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
     data_ = const_cast<const uint8_t*>(ptr);
-    size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
+    size_ = PyArray_NBYTES(ndarray);
     capacity_ = size_;
     is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
   }
@@ -148,7 +148,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
     TO_ARROW_TYPE_CASE(UNICODE, utf8);
     case NPY_DATETIME: {
       auto date_dtype =
-          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
       switch (date_dtype->meta.base) {
         case NPY_FR_s:
           *out = timestamp(TimeUnit::SECOND);
@@ -173,7 +173,7 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
     } break;
     case NPY_TIMEDELTA: {
       auto timedelta_dtype =
-          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
       switch (timedelta_dtype->meta.base) {
         case NPY_FR_s:
           *out = duration(TimeUnit::SECOND);
--- contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_interop.h	(index)
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_interop.h	(working tree)
@@ -67,6 +67,13 @@
 #define NPY_INT32_IS_INT 0
 #endif
 
+// Backported NumPy 2 API (can be removed if numpy 2 is required)
+#if NPY_ABI_VERSION < 0x02000000
+#define PyDataType_ELSIZE(descr) ((descr)->elsize)
+#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
+#define PyDataType_FIELDS(descr) ((descr)->fields)
+#endif
+
 namespace arrow {
 namespace py {
 
--- contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_to_arrow.cc	(index)
+++ contrib/libs/apache/arrow/cpp/src/arrow/python/numpy_to_arrow.cc	(working tree)
@@ -193,7 +193,7 @@ class NumPyConverter {
       mask_ = reinterpret_cast<PyArrayObject*>(mo);
     }
     length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
-    itemsize_ = static_cast<int>(PyArray_DESCR(arr_)->elsize);
+    itemsize_ = static_cast<int64_t>(PyArray_ITEMSIZE(arr_));
     stride_ = static_cast<int64_t>(PyArray_STRIDES(arr_)[0]);
   }
 
@@ -470,7 +470,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
 
   RETURN_NOT_OK(PrepareInputData<Date32Type>(data));
 
-  auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
+  auto date_dtype =
+      reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
   if (dtype_->type_num == NPY_DATETIME) {
     // If we have inbound datetime64[D] data, this needs to be downcasted
     // separately here from int64_t to int32_t, because this data is not
@@ -506,7 +507,8 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
 
   RETURN_NOT_OK(PrepareInputData<Date64Type>(data));
 
-  auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
+  auto date_dtype =
+      reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
   if (dtype_->type_num == NPY_DATETIME) {
     // If we have inbound datetime64[D] data, this needs to be downcasted
     // separately here from int64_t to int32_t, because this data is not
@@ -736,12 +738,13 @@ Status NumPyConverter::Visit(const StructType& type) {
     PyAcquireGIL gil_lock;
 
     // Create converters for each struct type field
-    if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) {
+    if (PyDataType_FIELDS(dtype_) == NULL || !PyDict_Check(PyDataType_FIELDS(dtype_))) {
       return Status::TypeError("Expected struct array");
     }
 
     for (auto field : type.fields()) {
-      PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str());
+      PyObject* tup =
+          PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str());
       if (tup == NULL) {
         return Status::Invalid("Missing field '", field->name(), "' in struct array");
       }