-
Notifications
You must be signed in to change notification settings - Fork 3k
Build: remove Hadoop 2 dependency #12348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,7 @@ | |
| import org.apache.spark.sql.Row; | ||
| import org.apache.spark.sql.SparkSession; | ||
| import org.junit.AfterClass; | ||
| import org.junit.Before; | ||
| import org.junit.BeforeClass; | ||
| import org.junit.Rule; | ||
| import org.junit.Test; | ||
|
|
@@ -106,6 +107,13 @@ public static void startSpark() { | |
| TestCompressionSettings.spark = SparkSession.builder().master("local[2]").getOrCreate(); | ||
| } | ||
|
|
||
| @Before | ||
| public void resetSpecificConfigurations() { | ||
| spark.conf().unset(COMPRESSION_CODEC); | ||
| spark.conf().unset(COMPRESSION_LEVEL); | ||
| spark.conf().unset(COMPRESSION_STRATEGY); | ||
| } | ||
|
Comment on lines
+110
to
+115
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for other reviewers: the same is already being done in the Spark 3.5 version of the test and was added by #11333 |
||
|
|
||
| @Parameterized.AfterParam | ||
| public static void clearSourceCache() { | ||
| spark.sql(String.format("DROP TABLE IF EXISTS %s", TABLE_NAME)); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,8 @@ | |
| import static org.assertj.core.api.Assertions.assertThatThrownBy; | ||
|
|
||
| import java.io.File; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Paths; | ||
| import java.util.List; | ||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.iceberg.PartitionSpec; | ||
|
|
@@ -118,6 +120,7 @@ public void testStreamingWriteAppendMode() throws Exception { | |
| // remove the last commit to force Spark to reprocess batch #1 | ||
| File lastCommitFile = new File(checkpoint + "/commits/1"); | ||
| Assert.assertTrue("The commit file must be deleted", lastCommitFile.delete()); | ||
| Files.deleteIfExists(Paths.get(checkpoint + "/commits/.1.crc")); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need this now?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The .crc file will be renamed along with the main file since HADOOP-16255, deleting the main file without deleting the crc file will result in a failure when renaming to the main file again: |
||
|
|
||
| // restart the query from the checkpoint | ||
| StreamingQuery restartedQuery = streamWriter.start(); | ||
|
|
@@ -178,6 +181,7 @@ public void testStreamingWriteCompleteMode() throws Exception { | |
| // remove the last commit to force Spark to reprocess batch #1 | ||
| File lastCommitFile = new File(checkpoint + "/commits/1"); | ||
| Assert.assertTrue("The commit file must be deleted", lastCommitFile.delete()); | ||
| Files.deleteIfExists(Paths.get(checkpoint + "/commits/.1.crc")); | ||
|
|
||
| // restart the query from the checkpoint | ||
| StreamingQuery restartedQuery = streamWriter.start(); | ||
|
|
@@ -238,6 +242,7 @@ public void testStreamingWriteCompleteModeWithProjection() throws Exception { | |
| // remove the last commit to force Spark to reprocess batch #1 | ||
| File lastCommitFile = new File(checkpoint + "/commits/1"); | ||
| Assert.assertTrue("The commit file must be deleted", lastCommitFile.delete()); | ||
| Files.deleteIfExists(Paths.get(checkpoint + "/commits/.1.crc")); | ||
|
|
||
| // restart the query from the checkpoint | ||
| StreamingQuery restartedQuery = streamWriter.start(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this changed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
org.apache.directory.api.util.Hexis not available after switching to Hadoop 3. Also, I think it is more reasonable to use a function from Apache Commons for this purpose.