Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,7 @@ jobs:
apt-get update -y
apt-get install -y ruby ruby-dev
Rscript -e "install.packages(c('remotes', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2', 'ggplot2', 'mvtnorm', 'statmod'), repos='https://cloud.r-project.org/')"
Rscript -e "remotes::install_version('ragg', version='1.2.5', repos='https://cloud.r-project.org')"
Rscript -e "remotes::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
Rscript -e "remotes::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
gem install bundler -v 2.4.22
Expand Down
7 changes: 6 additions & 1 deletion R/pkg/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,11 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
error = function(e) { FALSE })) {
obj <- get(nodeChar, envir = func.env, inherits = FALSE)
if (is.function(obj)) {
if (is.primitive(obj)) {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As of Spark 4.0, SparkR is deprecated. So just applying this change to branch-3.5 in this PR.
Do we need to apply this change for other branches including master?
CRAN seems to provide only the latest version of R. So it's difficult to pin to a older version, and this change is necessary at least for branch-3.5 for CI.

# Primitive functions have no closure to clean.
assign(nodeChar, obj, envir = newEnv)
break
}
# If the node is a function call.
funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F,
ifnotfound = list(list(NULL)))[[1]]
Expand Down Expand Up @@ -592,7 +597,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
# return value
# a new version of func that has a correct environment (closure).
cleanClosure <- function(func, checkedFuncs = new.env()) {
if (is.function(func)) {
if (is.function(func) && !is.primitive(func)) {
newEnv <- new.env(parent = .GlobalEnv)
func.body <- body(func)
oldEnv <- environment(func)
Expand Down
7 changes: 4 additions & 3 deletions R/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ if [[ $FAILED != 0 || $NUM_TEST_WARNING != 0 ]]; then
echo -en "\033[0m" # No color
exit -1
else
# We have 2 NOTEs: for RoxygenNote and one in Jenkins only "No repository set"
# We have 3 NOTEs: for RoxygenNote, one in Jenkins only "No repository set",
# and "Lost braces" in Rd files due to R 4.4+ stricter checkRd
# For non-latest version branches, one WARNING for package version
if [[ ($NUM_CRAN_WARNING != 0 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES -gt 2) &&
($HAS_PACKAGE_VERSION_WARN != 1 || $NUM_CRAN_WARNING != 1 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES -gt 1) ]]; then
if [[ ($NUM_CRAN_WARNING != 0 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES -gt 3) &&
($HAS_PACKAGE_VERSION_WARN != 1 || $NUM_CRAN_WARNING != 1 || $NUM_CRAN_ERROR != 0 || $NUM_CRAN_NOTES -gt 2) ]]; then
cat $CRAN_CHECK_LOG_FILE
echo -en "\033[31m" # Red
echo "Had CRAN check errors; see logs."
Expand Down
27 changes: 13 additions & 14 deletions dev/infra/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,39 @@
# See also in https://hub.docker.com/_/ubuntu
FROM ubuntu:focal-20221019

ENV FULL_REFRESH_DATE 20221118
ENV FULL_REFRESH_DATE 20260510

ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN true

ARG APT_INSTALL="apt-get install --no-install-recommends -y"

RUN apt-get clean
RUN apt-get update
RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget openjdk-8-jdk libpython3-dev python3-pip python3-setuptools python3.8 python3.9
RUN apt-get update && $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget openjdk-8-jdk libpython3-dev python3-pip python3-setuptools python3.8 python3.9
RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java

RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
RUN curl -sS https://bootstrap.pypa.io/pip/3.9/get-pip.py | python3.9

RUN add-apt-repository ppa:pypy/ppa
RUN apt update
RUN $APT_INSTALL gfortran libopenblas-dev liblapack-dev
RUN $APT_INSTALL build-essential
RUN apt-get update && $APT_INSTALL gfortran libopenblas-dev liblapack-dev build-essential

RUN mkdir -p /usr/local/pypy/pypy3.8 && \
curl -sqL https://downloads.python.org/pypy/pypy3.8-v7.3.11-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.8 --strip-components=1 && \
ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3.8 && \
ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3

RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
RUN curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py | pypy3

RUN $APT_INSTALL gnupg ca-certificates pandoc
RUN apt-get update && $APT_INSTALL gnupg ca-certificates pandoc
RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list
RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
RUN gpg -a --export E084DAB9 | apt-key add -
RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/'
RUN apt update
RUN $APT_INSTALL r-base libcurl4-openssl-dev qpdf libssl-dev zlib1g-dev
RUN apt-get update && $APT_INSTALL r-base libcurl4-openssl-dev qpdf libssl-dev zlib1g-dev libuv1-dev
RUN Rscript -e "install.packages(c('remotes', 'knitr', 'markdown', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'roxygen2', 'xml2'), repos='https://cloud.r-project.org/')"

# See more in SPARK-39959, roxygen2 < 7.2.1
RUN apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
RUN apt-get update && apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \
libtiff5-dev libjpeg-dev
RUN Rscript -e "install.packages(c('remotes'), repos='https://cloud.r-project.org/')"
Expand All @@ -64,8 +60,11 @@ RUN Rscript -e "remotes::install_version('roxygen2', version='7.2.0', repos='htt
# See more in SPARK-39735
ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"

RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
RUN python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
RUN printf 'beniget==0.4.1\npyproject-metadata==0.8.1\n' > /tmp/pypy-constraints.txt && \
PIP_CONSTRAINT=/tmp/pypy-constraints.txt pypy3 -m pip install numpy scipy coverage matplotlib && \
SETUPTOOLS_USE_DISTUTILS=stdlib pypy3 -m pip install 'pandas<=2.0.3' && \
rm /tmp/pypy-constraints.txt
RUN python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' 'Flask==1.1.2' 'Werkzeug==2.1.2'

# Add Python deps for Spark Connect.
RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'
Expand Down
6 changes: 6 additions & 0 deletions python/mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,9 @@ ignore_missing_imports = True
; Ignore errors for proto generated code
[mypy-pyspark.sql.connect.proto.*, pyspark.sql.connect.proto]
ignore_errors = True

[mypy-pydantic.*]
follow_imports = skip

[mypy-sqlalchemy.*]
follow_imports = skip
8 changes: 4 additions & 4 deletions python/pyspark/ml/tests/typing/test_feature.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
out: |
main:14: error: No overload variant of "StringIndexer" matches argument types "str", "List[str]" [call-overload]
main:14: note: Possible overload variants:
main:14: note: def StringIndexer(self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:14: note: def StringIndexer(self, *, inputCols: Optional[List[str]] = ..., outputCols: Optional[List[str]] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:14: note: def __init__(self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:14: note: def __init__(self, *, inputCols: Optional[List[str]] = ..., outputCols: Optional[List[str]] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:15: error: No overload variant of "StringIndexer" matches argument types "List[str]", "str" [call-overload]
main:15: note: Possible overload variants:
main:15: note: def StringIndexer(self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:15: note: def StringIndexer(self, *, inputCols: Optional[List[str]] = ..., outputCols: Optional[List[str]] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:15: note: def __init__(self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
main:15: note: def __init__(self, *, inputCols: Optional[List[str]] = ..., outputCols: Optional[List[str]] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
16 changes: 8 additions & 8 deletions python/pyspark/sql/tests/typing/test_functions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,32 +70,32 @@
main:29: error: No overload variant of "array" matches argument types "List[Column]", "List[Column]" [call-overload]
main:29: note: Possible overload variants:
main:29: note: def array(*cols: Union[Column, str]) -> Column
main:29: note: def [ColumnOrName_] array(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:29: note: def [ColumnOrName_] array(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:30: error: No overload variant of "create_map" matches argument types "List[Column]", "List[Column]" [call-overload]
main:30: note: Possible overload variants:
main:30: note: def create_map(*cols: Union[Column, str]) -> Column
main:30: note: def [ColumnOrName_] create_map(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:30: note: def [ColumnOrName_] create_map(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:31: error: No overload variant of "map_concat" matches argument types "List[Column]", "List[Column]" [call-overload]
main:31: note: Possible overload variants:
main:31: note: def map_concat(*cols: Union[Column, str]) -> Column
main:31: note: def [ColumnOrName_] map_concat(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:31: note: def [ColumnOrName_] map_concat(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:32: error: No overload variant of "struct" matches argument types "List[str]", "List[str]" [call-overload]
main:32: note: Possible overload variants:
main:32: note: def struct(*cols: Union[Column, str]) -> Column
main:32: note: def [ColumnOrName_] struct(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:32: note: def [ColumnOrName_] struct(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:33: error: No overload variant of "array" matches argument types "List[str]", "List[str]" [call-overload]
main:33: note: Possible overload variants:
main:33: note: def array(*cols: Union[Column, str]) -> Column
main:33: note: def [ColumnOrName_] array(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:33: note: def [ColumnOrName_] array(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:34: error: No overload variant of "create_map" matches argument types "List[str]", "List[str]" [call-overload]
main:34: note: Possible overload variants:
main:34: note: def create_map(*cols: Union[Column, str]) -> Column
main:34: note: def [ColumnOrName_] create_map(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:34: note: def [ColumnOrName_] create_map(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:35: error: No overload variant of "map_concat" matches argument types "List[str]", "List[str]" [call-overload]
main:35: note: Possible overload variants:
main:35: note: def map_concat(*cols: Union[Column, str]) -> Column
main:35: note: def [ColumnOrName_] map_concat(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:35: note: def [ColumnOrName_] map_concat(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:36: error: No overload variant of "struct" matches argument types "List[str]", "List[str]" [call-overload]
main:36: note: Possible overload variants:
main:36: note: def struct(*cols: Union[Column, str]) -> Column
main:36: note: def [ColumnOrName_] struct(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
main:36: note: def [ColumnOrName_] struct(__cols, Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column
2 changes: 1 addition & 1 deletion python/pyspark/sql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from pyspark.sql.window import Window
from pyspark.pandas._typing import IndexOpsLike, SeriesOrIndex

has_numpy = False
has_numpy: bool = False
try:
import numpy as np # noqa: F401

Expand Down