Skip to content

[CI] Add two pre-commit hooks forbid and remove tabs #5323

[CI] Add two pre-commit hooks forbid and remove tabs

[CI] Add two pre-commit hooks forbid and remove tabs #5323

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Python build
on:
push:
branches:
- master
paths:
- 'common/**'
- 'spark/**'
- 'spark-shaded/**'
- 'pom.xml'
- 'python/**'
- '.github/workflows/python.yml'
pull_request:
branches:
- '*'
paths:
- 'common/**'
- 'spark/**'
- 'spark-shaded/**'
- 'pom.xml'
- 'python/**'
- '.github/workflows/python.yml'
env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
JAI_CORE_VERSION: '1.1.3'
JAI_CODEC_VERSION: '1.1.3'
JAI_IMAGEIO_VERSION: '1.1'
DO_NOT_TRACK: true
SPARK_LOCAL_IP: 127.0.0.1
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
jobs:
build:
runs-on: ubuntu-22.04
strategy:
matrix:
include:
- spark: '4.1.1'
scala: '2.13.8'
java: '17'
python: '3.11'
- spark: '4.0.0'
scala: '2.13.8'
java: '17'
python: '3.10'
- spark: '3.5.0'
scala: '2.12.8'
java: '11'
python: '3.9'
- spark: '3.4.0'
scala: '2.12.8'
java: '11'
python: '3.8'
shapely: '1'
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false
- uses: actions/setup-java@v5
with:
distribution: 'zulu'
java-version: '${{ matrix.java }}'
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
- name: Install uv
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
- name: Cache Maven packages
uses: actions/cache@v5
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2
- env:
SPARK_VERSION: ${{ matrix.spark }}
SCALA_VERSION: ${{ matrix.scala }}
run: |
SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3}
mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools
- name: Setup Python build environment
env:
SPARK_VERSION: ${{ matrix.spark }}
SHAPELY_VERSION: ${{ matrix.shapely }}
run: |
cd python
# Conditional shapely version adjustments
if [ "${SHAPELY_VERSION}" == "1" ]; then
uv add "shapely<2.0.0"
fi
if [ "${SPARK_VERSION:0:1}" == "4" ]; then
# Spark 4.x requires Python 3.10+, and we remove flink since it conflicts with pyspark 4.x
uv remove apache-flink --optional flink
uv add "pyspark==${SPARK_VERSION}; python_version >= '3.10'"
else
# Install specific pyspark version matching matrix
uv add pyspark==${SPARK_VERSION}
fi
uv sync
uv run python -c "import pyspark,sys;print('Using pyspark', pyspark.__version__)"
- name: Install sedona package
run: cd python; uv pip install -e .
- name: Prepare Sedona Spark Dependencies
run: |
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar
PY_SITE=$(cd python; uv run python -c "import site; print(site.getsitepackages()[0])")
echo "Python site-packages: $PY_SITE"
mv -v jai_core-${JAI_CORE_VERSION}.jar ${PY_SITE}/pyspark/jars
mv -v jai_codec-${JAI_CODEC_VERSION}.jar ${PY_SITE}/pyspark/jars
mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar ${PY_SITE}/pyspark/jars
- name: Copy Sedona Spark JARs
run: |
PY_SITE=$(cd python; uv run python -c "import site; print(site.getsitepackages()[0])")
find spark-shaded/target -name sedona-*.jar -exec cp {} ${PY_SITE}/pyspark/jars/ \;
- name: Run tests
run: |
cd python
export SPARK_HOME=$(uv run python -c "import site; print(site.getsitepackages()[0]+'/pyspark')")
uv run pytest -v tests
- name: Run basic tests without rasterio
run: |
cd python
export SPARK_HOME=$(uv run python -c "import site; print(site.getsitepackages()[0]+'/pyspark')")
uv remove rasterio --optional all
uv remove rasterio --dev
uv sync
uv run pytest -v tests/core/test_rdd.py tests/sql/test_dataframe_api.py
- name: Run Spark Connect tests
if: ${{ matrix.spark >= '3.4.0' }}
run: |
cd python
export SPARK_REMOTE=local
export SPARK_HOME=$(uv run python -c "import site; print(site.getsitepackages()[0]+'/pyspark')")
uv pip install "pyspark[connect]==${{ matrix.spark }}" --reinstall
uv run pytest -v tests/sql/test_dataframe_api.py