@@ -121,15 +121,38 @@ stages:
121
121
targetType : inline
122
122
script : |
123
123
echo "Download Hadoop utils for Windows."
124
- curl -k -L -o hadoop.zip https://github.com/steveloughran/winutils/releases/download/tag_2017-08-29-hadoop-2.8.1-native/hadoop-2.8.1.zip
124
+ $hadoopBinaryUrl = "https://github.com/steveloughran/winutils/releases/download/tag_2017-08-29-hadoop-2.8.1-native/hadoop-2.8.1.zip"
125
+ # Spark 3.3.3 version binary use Hadoop3 dependency
126
+ if ("3.3.3" -contains "${{ test.version }}") {
127
+ $hadoopBinaryUrl = "https://github.com/SparkSnail/winutils/releases/download/hadoop-3.3.5/hadoop-3.3.5.zip"
128
+ }
129
+ curl -k -L -o hadoop.zip $hadoopBinaryUrl
125
130
Expand-Archive -Path hadoop.zip -Destination .
126
131
New-Item -ItemType Directory -Force -Path hadoop\bin
127
- cp hadoop-2.8.1\winutils.exe hadoop\bin
132
+ if ("3.3.3" -contains "${{ test.version }}") {
133
+ cp hadoop-3.3.5\winutils.exe hadoop\bin
134
+ # Hadoop 3.3 need to add hadoop.dll to environment varibles to avoid UnsatisfiedLinkError
135
+ cp hadoop-3.3.5\hadoop.dll hadoop\bin
136
+ cp hadoop-3.3.5\hadoop.dll C:\Windows\System32
137
+ [System.Environment]::SetEnvironmentVariable("PATH", $Env:Path + ";$(Build.BinariesDirectory)$(PATH_SEPARATOR)hadoop", [System.EnvironmentVariableTarget]::Machine)
138
+ } else {
139
+ cp hadoop-2.8.1\winutils.exe hadoop\bin
140
+ }
128
141
129
142
- pwsh : |
130
143
echo "Downloading Spark ${{ test.version }}"
131
- curl -k -L -o spark-${{ test.version }}.tgz https://archive.apache.org/dist/spark/spark-${{ test.version }}/spark-${{ test.version }}-bin-hadoop2.7.tgz
144
+ $sparkBinaryName = "spark-${{ test.version }}-bin-hadoop2.7"
145
+ # In spark 3.3.0, 3.3.1, 3.3.2, 3.3.4, the binary name with hadoop2 dependency has changed to spark-${{ test.version }}-bin-hadoop2.tgz
146
+ if ("3.3.0", "3.3.1", "3.3.2", "3.3.4" -contains "${{ test.version }}") {
147
+ $sparkBinaryName = "spark-${{ test.version }}-bin-hadoop2"
148
+ }
149
+ # In spark 3.3.3, the binary don't provide hadoop2 version, so we use hadoop3 version
150
+ if ("3.3.3" -contains "${{ test.version }}") {
151
+ $sparkBinaryName = "spark-${{ test.version }}-bin-hadoop3"
152
+ }
153
+ curl -k -L -o spark-${{ test.version }}.tgz https://archive.apache.org/dist/spark/spark-${{ test.version }}/${sparkBinaryName}.tgz
132
154
tar xzvf spark-${{ test.version }}.tgz
155
+ move $sparkBinaryName spark-${{ test.version }}-bin-hadoop
133
156
displayName: 'Download Spark Distro ${{ test.version }}'
134
157
workingDirectory: $(Build.BinariesDirectory)
135
158
@@ -142,7 +165,7 @@ stages:
142
165
workingDirectory : $(Build.SourcesDirectory)$(PATH_SEPARATOR)dotnet-spark
143
166
env :
144
167
HADOOP_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)hadoop
145
- SPARK_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)spark-${{ test.version }}-bin-hadoop2.7
168
+ SPARK_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)spark-${{ test.version }}-bin-hadoop
146
169
DOTNET_WORKER_DIR : $(CURRENT_DOTNET_WORKER_DIR)
147
170
148
171
- pwsh : |
@@ -167,7 +190,7 @@ stages:
167
190
workingDirectory : $(Build.SourcesDirectory)$(PATH_SEPARATOR)dotnet-spark
168
191
env :
169
192
HADOOP_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)hadoop
170
- SPARK_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)spark-${{ test.version }}-bin-hadoop2.7
193
+ SPARK_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)spark-${{ test.version }}-bin-hadoop
171
194
DOTNET_WORKER_DIR : $(BACKWARD_COMPATIBLE_DOTNET_WORKER_DIR)
172
195
173
196
- checkout : forwardCompatibleRelease
@@ -189,5 +212,5 @@ stages:
189
212
workingDirectory : $(Build.SourcesDirectory)$(PATH_SEPARATOR)dotnet-spark-${{ parameters.forwardCompatibleRelease }}
190
213
env :
191
214
HADOOP_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)hadoop
192
- SPARK_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)spark-${{ test.version }}-bin-hadoop2.7
215
+ SPARK_HOME : $(Build.BinariesDirectory)$(PATH_SEPARATOR)spark-${{ test.version }}-bin-hadoop
193
216
DOTNET_WORKER_DIR : $(CURRENT_DOTNET_WORKER_DIR)
0 commit comments