# Contributor: Duncan Bellamy # Maintainer: Duncan Bellamy # based on arch linux PKGBUILD pkgname=apache-arrow pkgver=11.0.0 pkgrel=6 pkgdesc="multi-language toolbox for accelerated data interchange and in-memory processing" url="https://arrow.apache.org/" arch="all" license="Apache-2.0" _py3depends=" cython py3-numpy-dev py3-setuptools py3-setuptools_scm py3-wheel python3-dev " makedepends=" apache-orc-dev boost-dev brotli-dev bzip2-dev c-ares-dev cmake glog-dev grpc-dev gtest-dev lz4-dev openssl-dev>3 protobuf-dev rapidjson-dev re2-dev samurai snappy-dev thrift-dev utf8proc-dev zlib-dev zstd-dev $_py3depends " _py3checkdepends=" py3-cffi py3-hypothesis py3-pandas py3-pytest py3-pytest-lazy-fixture py3-pytest-xdist " checkdepends="bash grep gzip perl python3 tzdata $_py3checkdepends" subpackages="$pkgname-dev $pkgname-tools $pkgname-doc py3-pyarrow:python_arrow" _arrowsha="d2c73bf78246331d8e58b6f11aa8aa199cbb5929" _parquetsha="5b82793ef7196f7b3583e85669ced211cd8b5ff2" source="https://downloads.apache.org/arrow/arrow-$pkgver/apache-arrow-$pkgver.tar.gz $pkgname-arrow-testing-$_arrowsha.tar.gz::https://github.com/apache/arrow-testing/archive/$_arrowsha.tar.gz $pkgname-parquet-testing-$_parquetsha.tar.gz::https://github.com/apache/parquet-testing/archive/$_parquetsha.tar.gz " options="!check" # fail with py3.11 case "$CARCH" in aarch64|x86|x86_64) _SIMD="MAX" makedepends="$makedepends xsimd-dev" ;; *) _SIMD="NONE" ;; esac build() { if [ "$CBUILD" != "$CHOST" ]; then CMAKE_CROSSOPTS="-DCMAKE_SYSTEM_NAME=Linux -DCMAKE_HOST_SYSTEM_NAME=Linux" fi case "$CARCH" in arm*) local arrowcpu="armv7" ;; esac cmake -B build-cpp -G Ninja \ -DCMAKE_INSTALL_PREFIX=/usr \ -DCMAKE_INSTALL_LIBDIR=lib \ -DBUILD_SHARED_LIBS=True \ -DCMAKE_BUILD_TYPE=MinSizeRel \ -DARROW_BUILD_STATIC=OFF \ -DARROW_DEPENDENCY_SOURCE=SYSTEM \ -DARROW_SIMD_LEVEL="NONE" \ -DARROW_RUNTIME_SIMD_LEVEL="$_SIMD" \ -DARROW_BUILD_EXAMPLES=OFF \ -DARROW_BUILD_TESTS="$(want_check && echo ON || echo OFF)" \ -DARROW_COMPUTE=ON \ -DARROW_FLIGHT=ON \ -DARROW_GANDIVA=OFF \ -DARROW_HDFS=ON \ -DARROW_IPC=ON \ -DARROW_JEMALLOC=OFF \ -DARROW_ORC=OFF \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=ON \ -DARROW_PYTHON=ON \ -DARROW_TENSORFLOW=ON \ -DARROW_USE_GLOG=ON \ -DARROW_ORC=ON \ -DARROW_WITH_BROTLI=ON \ -DARROW_WITH_BZ2=ON \ -DARROW_WITH_LZ4=ON \ -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=ON \ -DARROW_WITH_ZSTD=ON \ -DARROW_CPU_FLAG=$arrowcpu \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ -S cpp \ $CMAKE_CROSSOPTS cmake --build build-cpp # install in Arrow_DIR for python build to find DESTDIR="$builddir/dist-cpp" cmake --install build-cpp cd python export SETUPTOOLS_SCM_PRETEND_VERSION=$pkgver export Arrow_DIR="$builddir/dist-cpp/usr" export ArrowFlight_DIR="$Arrow_DIR" export Parquet_DIR="$Arrow_DIR" python3 setup.py build_ext --inplace \ --extra-cmake-args="-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=$_SIMD -DPYARROW_BUILD_DATASET=ON -DARROW_CPU_FLAG=$arrowcpu" \ --cmake-generator Ninja # TODO: why does it not work without this? python3 setup.py install --root="$builddir/throw" } check() { cd build-cpp export PARQUET_TEST_DATA="$srcdir/parquet-testing-$_parquetsha/data" export ARROW_TEST_DATA="$srcdir/arrow-testing-$_arrowsha/data" # exclude broken tests ctest -j4 --output-on-failure -E "arrow-buffer-test|arrow-misc-test|\ arrow-utility-test|arrow-csv-test|arrow-compute-aggregate-test|arrow-flight-test|arrow-compute-scalar-test|\ arrow-dataset-file-ipc-test|arrow-dataset-scanner-test|plasma-client-tests" cd ../python ARROW_HOME="$builddir/dist-cpp/usr" \ python3 setup.py install --root="$PWD/dist-python" PYTHONPATH="$(echo $PWD/dist-python/usr/lib/python3*/site-packages)" \ LIBRARY_PATH="$builddir/dist-cpp/usr/lib:$LIBRARY_PATH" \ LD_LIBRARY_PATH="$builddir/dist-cpp/usr/lib:$LD_LIBRARY_PATH" \ pytest -n 4 pyarrow --deselect=pyarrow/tests/test_memory.py \ --deselect=pyarrow/tests/test_csv.py \ --deselect=pyarrow/tests/parquet/test_data_types.py \ --deselect=pyarrow/tests/test_array.py::test_dictionary_to_numpy \ --deselect=pyarrow/tests/test_io.py::test_python_file_large_seeks \ --deselect=pyarrow/tests/test_io.py::test_foreign_buffer \ --deselect=pyarrow/tests/test_io.py::test_memory_map_large_seeks \ --deselect=pyarrow/tests/test_pandas.py::TestConvertStructTypes::test_from_numpy_nested \ --deselect=pyarrow/tests/test_schema.py::test_schema_sizeof \ --deselect=pyarrow/tests/test_serialization.py::test_primitive_serialization \ --deselect=pyarrow/tests/test_serialization.py::test_integer_limits \ --deselect=pyarrow/tests/parquet/test_dataset.py::test_partitioned_dataset } package() { DESTDIR="$pkgdir" cmake --install build-cpp cd python python3 setup.py install --skip-build --root="$pkgdir" rm -r "$pkgdir"/usr/lib/python3*/site-packages/pyarrow/tests } tools() { pkgdesc="$pkgdesc (extra tools)" amove usr/bin/plasma-store-server } python_arrow() { pkgdesc="$pkgdesc (python module)" depends="python3 py3-cffi py3-numpy" # renamed from this to avoid confusion as in python this is "pyarrow" provides="py3-apache-arrow=$pkgver-r$pkgrel" replaces="py3-apache-arrow" amove usr/lib/python3* usr/bin/plasma_store } sha512sums=" 46df4fb5a703d38d0a74fde9838e9f9702b24b442cb225517516c335a5ab18955699000bf0b2fc7d1698ada6d2e890ba3860933b6280f5160b0fce8a07484d0e apache-arrow-11.0.0.tar.gz 0be19960b0d22fc2e07bf84f11148e69d6fa82f10627eb2a5e4b762b1d4cf4e151a57c5a1dc761d6d8ed29881ea589e4cfe0cd47d5c7cbf7b6107143b4a45e82 apache-arrow-arrow-testing-d2c73bf78246331d8e58b6f11aa8aa199cbb5929.tar.gz b761ccca140ce109fd39320a0e0ab9b9cae07f0f28c0993eada332d6fd5f7a3854230c260467a9fd9f5b9a8941a375280f473f929480dca370ae45e31525b13d apache-arrow-parquet-testing-5b82793ef7196f7b3583e85669ced211cd8b5ff2.tar.gz "