diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 878f918710f2..c56bf4f0c593 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -20,7 +20,7 @@ ARG arch ARG python="3.12" FROM ${repo}:${arch}-conda-python-${python} -ARG selenium_version="4.15.2" +ARG selenium_version="4.41.0" ARG pyodide_version="0.26.0" ARG chrome_version="latest" ARG required_python_min="(3,12)" diff --git a/ci/scripts/install_chromedriver.sh b/ci/scripts/install_chromedriver.sh index 9167ae70e8d1..defc1a9e50b5 100755 --- a/ci/scripts/install_chromedriver.sh +++ b/ci/scripts/install_chromedriver.sh @@ -23,15 +23,22 @@ set -e chrome_version=$1 -if [ "$chrome_version" = "latest" ]; then - latest_release_path=LATEST_RELEASE_STABLE -else - latest_release_path=LATEST_RELEASE_${chrome_version} +# Look up the Chrome version from the apt repo's Packages file. +CHROME_DEB_VERSION=$(wget --no-verbose -O - \ + "https://dl.google.com/linux/chrome/deb/dists/stable/main/binary-amd64/Packages.gz" \ + | gunzip \ + | awk '/^Package: google-chrome-stable$/{found=1} found && /^Version: /{print $2; exit}') +CHROME_VERSION_FULL=${CHROME_DEB_VERSION%-*} + +# Validate there hasn't been major version bumps since the last time we updated this script. +if [ "$chrome_version" != "latest" ] && [ "${CHROME_VERSION_FULL%%.*}" != "$chrome_version" ]; then + echo "Requested Chrome major ${chrome_version}, but apt repo currently publishes ${CHROME_VERSION_FULL}" >&2 + exit 1 fi -CHROME_VERSION_FULL=$(wget -q --no-verbose -O - "https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}") -CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb" + +CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_DEB_VERSION}_amd64.deb" CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip" -wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}" +wget --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}" apt-get update apt install -qqy /tmp/google-chrome.deb rm -f /tmp/google-chrome.deb diff --git a/compose.yaml b/compose.yaml index be32a95dd945..f527a835a3ba 100644 --- a/compose.yaml +++ b/compose.yaml @@ -951,8 +951,8 @@ services: clang_tools: ${CLANG_TOOLS} llvm: ${LLVM} pyodide_version: "0.26.0" - chrome_version: "134" - selenium_version: "4.15.2" + chrome_version: "148" + selenium_version: "4.41.0" required_python_min: "(3,12)" python: ${PYTHON} shm_size: *shm-size diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 2f5bbf55a4cb..9787fabafae1 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2059,9 +2059,7 @@ function(build_protobuf) # Make protobuf_fc depend on the install completion marker add_custom_target(protobuf_fc DEPENDS "${PROTOBUF_PREFIX}/.protobuf_installed") - set(ARROW_BUNDLED_STATIC_LIBS - ${ARROW_BUNDLED_STATIC_LIBS} protobuf::libprotobuf - PARENT_SCOPE) + list(APPEND ARROW_BUNDLED_STATIC_LIBS protobuf::libprotobuf) if(CMAKE_CROSSCOMPILING) # If we are cross compiling, we need to build protoc for the host @@ -2069,12 +2067,21 @@ function(build_protobuf) set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") + # cross-compiled (PyArrow on emscripten) needs utf8_range bundled explicitly. + list(APPEND ARROW_BUNDLED_STATIC_LIBS utf8_range) + set(PROTOBUF_HOST_CMAKE_ARGS "-DCMAKE_CXX_FLAGS=" "-DCMAKE_C_FLAGS=" "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_DEBUG_POSTFIX=) + if(ABSL_VENDORED) + # Force protobuf to reuse Arrow's already-extracted absl source + # so we don't re-download and we don't have issues with multiple abseil. + list(APPEND PROTOBUF_HOST_CMAKE_ARGS -Dprotobuf_FORCE_FETCH_DEPENDENCIES=ON + "-DFETCHCONTENT_SOURCE_DIR_ABSL=${absl_SOURCE_DIR}") + endif() # We reuse the FetchContent downloaded source but build it with host compiler externalproject_add(protobuf_ep_host @@ -2089,7 +2096,102 @@ function(build_protobuf) PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host) + # For cross-compilation along with ExternalProject we need to + # manually include absl deps to the bundled static libs so that + # they are available for the generated code in protobuf v31. + list(APPEND + ARROW_BUNDLED_STATIC_LIBS + absl::bad_any_cast_impl + absl::bad_optional_access + absl::bad_variant_access + absl::base + absl::city + absl::civil_time + absl::cord + absl::cord_internal + absl::cordz_functions + absl::cordz_handle + absl::cordz_info + absl::cordz_sample_token + absl::crc32c + absl::crc_cord_state + absl::crc_cpu_detect + absl::crc_internal + absl::debugging_internal + absl::decode_rust_punycode + absl::demangle_internal + absl::demangle_rust + absl::die_if_null + absl::examine_stack + absl::exponential_biased + absl::failure_signal_handler + absl::flags_commandlineflag + absl::flags_commandlineflag_internal + absl::flags_config + absl::flags_internal + absl::flags_marshalling + absl::flags_parse + absl::flags_private_handle_accessor + absl::flags_program_name + absl::flags_reflection + absl::flags_usage + absl::flags_usage_internal + absl::graphcycles_internal + absl::hash + absl::hashtablez_sampler + absl::int128 + absl::kernel_timeout_internal + absl::leak_check + absl::log_globals + absl::log_initialize + absl::log_internal_check_op + absl::log_internal_conditions + absl::log_internal_fnmatch + absl::log_internal_format + absl::log_internal_globals + absl::log_internal_log_sink_set + absl::log_internal_message + absl::log_internal_nullguard + absl::log_internal_proto + absl::log_severity + absl::log_sink + absl::low_level_hash + absl::malloc_internal + absl::periodic_sampler + absl::poison + absl::random_distributions + absl::random_internal_distribution_test_util + absl::random_internal_platform + absl::random_internal_pool_urbg + absl::random_internal_randen + absl::random_internal_randen_hwaes + absl::random_internal_randen_hwaes_impl + absl::random_internal_randen_slow + absl::random_internal_seed_material + absl::random_seed_gen_exception + absl::random_seed_sequences + absl::raw_hash_set + absl::raw_logging_internal + absl::scoped_set_env + absl::spinlock_wait + absl::stacktrace + absl::status + absl::statusor + absl::str_format_internal + absl::strerror + absl::strings + absl::strings_internal + absl::symbolize + absl::synchronization + absl::throw_delegate + absl::time + absl::time_zone + absl::utf8_for_code_point + absl::vlog_config_internal) endif() + set(ARROW_BUNDLED_STATIC_LIBS + "${ARROW_BUNDLED_STATIC_LIBS}" + PARENT_SCOPE) list(POP_BACK CMAKE_MESSAGE_INDENT) endfunction() diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index d00c0c4b3eb9..3afe3281cbc3 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -5907,6 +5907,7 @@ def test_make_write_options_error(): pformat.make_write_options(43) +@pytest.mark.substrait def test_scanner_from_substrait(dataset): try: import pyarrow.substrait as ps diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py index a4f9ce9d9a28..3cd7d5fd67b3 100644 --- a/python/scripts/run_emscripten_tests.py +++ b/python/scripts/run_emscripten_tests.py @@ -35,7 +35,9 @@ class TemplateOverrider(http.server.SimpleHTTPRequestHandler): def log_request(self, code="-", size="-"): - # don't log successful requests + # don't log successful requests but log errors + if isinstance(code, int) and code >= 400: + sys.stderr.write(f"HTTP {code} for {self.path}\n") return def do_GET(self) -> bytes | None: @@ -200,7 +202,8 @@ class BrowserDriver: def __init__(self, hostname, port, driver): self.driver = driver self.driver.get(f"http://{hostname}:{port}/test.html") - self.driver.set_script_timeout(100) + # Chrome on CI takes longer than locally to compile. + self.driver.set_script_timeout(1200) def load_pyodide(self, dist_dir): pass @@ -259,7 +262,9 @@ def __init__(self, hostname, port): options = Options() options.add_argument("--headless") options.add_argument("--no-sandbox") - super().__init__(hostname, port, webdriver.Chrome(options=options)) + driver = webdriver.Chrome(options=options) + driver.command_executor._client_config.timeout = 1200 + super().__init__(hostname, port, driver) class FirefoxDriver(BrowserDriver): @@ -336,7 +341,9 @@ def _load_pyarrow_in_runner(driver, wheel_name): """ import pyarrow,pathlib pyarrow_dir = pathlib.Path(pyarrow.__file__).parent -pytest.main([pyarrow_dir, '-r', 's']) +# Substrait expression serialization crashes pyodide with a +# "Cannot convert a BigInt value to a number" error. +pytest.main([pyarrow_dir, '-r', 's', '-m', 'not substrait']) """, wait_for_terminate=False, )