Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- [#184](https://github.com/green-code-initiative/creedengo-java/issues/184) GCI99 - Add new Java rule, avoid CSV format, prefer Apache Parquet

### Changed

- [#119](https://github.com/green-code-initiative/creedengo-java/issues/119) GCI94 - reduce false positives: rule no longer flags `orElse()` when argument is a constant, literal, static field or null; detection extended to Optional variables (semantic type check) and to computed arguments nested inside concatenation, ternary or object instantiation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,4 +529,25 @@ void testGCI94() {
checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_1MIN);
}

@Test
void testGCI99() {
String filePath = "src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormat.java";
String ruleId = "creedengo-java:GCI99";
String ruleMsg = "Avoid CSV format, prefer Parquet format for better performance and smaller footprint.";
int[] startLines = new int[]{20, 21, 22, 23, 24, 25};
int[] endLines = new int[]{20, 21, 22, 23, 24, 25};

checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_20MIN);
}

@Test
void testGCI99_good() {
String filePath = "src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatNoIssue.java";
String ruleId = "creedengo-java:GCI99";
String ruleMsg = "Avoid CSV format, prefer Parquet format for better performance and smaller footprint.";
int[] startLines = new int[]{};
int[] endLines = new int[]{};

checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_20MIN);
}
}
31 changes: 31 additions & 0 deletions src/it/test-projects/creedengo-java-plugin-test-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@
<artifactId>spring-beans</artifactId>
<version>5.3.25</version>
</dependency>
<!-- CSV libraries for GCI99 test cases -->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>5.9</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.11.0</version>
</dependency>
<dependency>
<groupId>com.univocity</groupId>
<artifactId>univocity-parsers</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-csv</artifactId>
<version>2.18.2</version>
</dependency>
<dependency>
<groupId>net.sf.supercsv</groupId>
<artifactId>super-csv</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>net.sf.flatpack</groupId>
<artifactId>flatpack</artifactId>
<version>4.0.15</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.greencodeinitiative.creedengo.java.checks;

import com.opencsv.CSVWriter; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}}
import org.apache.commons.csv.CSVPrinter; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}}
import com.univocity.parsers.csv.CsvParser; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}}
import com.fasterxml.jackson.dataformat.csv.CsvMapper; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}}
import org.supercsv.io.CsvBeanWriter; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}}
import net.sf.flatpack.DataSet; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}}

public class AvoidCSVFormat {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.greencodeinitiative.creedengo.java.checks;

import java.io.FileOutputStream;
import java.io.IOException;

/**
* Compliant — uses standard I/O only, no CSV library.
* In a real project this would use Apache Parquet or Apache Avro instead.
*/
public class AvoidCSVFormatNoIssue {

public void writeData(String path) throws IOException {
try (FileOutputStream fos = new FileOutputStream(path)) {
fos.write("data".getBytes());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public class JavaCheckRegistrar implements CheckRegistrar {
FreeResourcesOfAutoCloseableInterface.class,
AvoidMultipleIfElseStatement.class,
UseOptionalOrElseGetVsOrElse.class,
MakeNonReassignedVariablesConstants.class
MakeNonReassignedVariablesConstants.class,
AvoidCSVFormat.class
);

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.greencodeinitiative.creedengo.java.checks;

import java.util.Collections;
import java.util.List;
import java.util.Set;

import javax.annotation.Nonnull;
import org.sonar.check.Rule;
import org.sonar.plugins.java.api.IssuableSubscriptionVisitor;
import org.sonar.plugins.java.api.tree.IdentifierTree;
import org.sonar.plugins.java.api.tree.ImportTree;
import org.sonar.plugins.java.api.tree.MemberSelectExpressionTree;
import org.sonar.plugins.java.api.tree.Tree;

@Rule(key = "GCI99")
public class AvoidCSVFormat extends IssuableSubscriptionVisitor {

protected static final String MESSAGE_RULE = "Avoid CSV format, prefer Parquet format for better performance and smaller footprint.";

// Known Java CSV library package prefixes.
private static final Set<String> CSV_PACKAGES = Set.of(
"com.opencsv.",
"org.apache.commons.csv.",
"com.univocity.parsers.csv.",
"com.fasterxml.jackson.dataformat.csv.",
"org.supercsv.",
"net.sf.flatpack."
);

@Override
public List<Tree.Kind> nodesToVisit() {
return Collections.singletonList(Tree.Kind.IMPORT);
}

@Override
public void visitNode(@Nonnull Tree tree) {
ImportTree importTree = (ImportTree) tree;
String importName = buildImportString(importTree.qualifiedIdentifier());
for (String csvPackage : CSV_PACKAGES) {
if (importName.startsWith(csvPackage)) {
reportIssue(importTree, MESSAGE_RULE);
return;
}
}
}

private static String buildImportString(Tree tree) {
if (tree instanceof IdentifierTree) {
return ((IdentifierTree) tree).name();
}
if (tree instanceof MemberSelectExpressionTree) {
MemberSelectExpressionTree mset = (MemberSelectExpressionTree) tree;
return buildImportString(mset.expression()) + "." + mset.identifier().name();
}
return "";
}
}
41 changes: 41 additions & 0 deletions src/main/resources/org/green-code-initiative/rules/java/GCI99.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<div class="paragraph">
<p>Avoid using CSV format for data exchange. Prefer Apache Parquet instead.</p>
</div>
<div class="paragraph">
<p>The CSV format has several drawbacks compared to columnar binary formats like Parquet:</p>
</div>
<div class="ulist">
<ul>
<li><p><strong>Size</strong>: Parquet compresses data significantly better than plain-text CSV, reducing storage and network transfer costs.</p></li>
<li><p><strong>Read performance</strong>: Parquet supports column pruning and predicate push-down, so only the required columns and rows are read.</p></li>
<li><p><strong>Write performance</strong>: Parquet encoding (dictionary, RLE, bit-packing) makes writes faster for large datasets.</p></li>
<li><p><strong>Schema</strong>: Parquet embeds schema metadata, removing the need for fragile header-row parsing.</p></li>
</ul>
</div>
<div class="paragraph">
<p>This rule detects imports from the most popular Java CSV libraries (OpenCSV, Apache Commons CSV, Univocity Parsers, Jackson CSV, Super CSV).</p>
</div>
<div class="sect1">
<h2 id="_noncompliant_code_example">Noncompliant Code Example</h2>
<div class="sectionbody">
<div class="listingblock">
<div class="content">
<pre class="CodeRay highlight"><code data-lang="java">import com.opencsv.CSVWriter; // Noncompliant
import org.apache.commons.csv.CSVPrinter; // Noncompliant
import com.univocity.parsers.csv.CsvParser; // Noncompliant</code></pre>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_compliant_solution">Compliant Solution</h2>
<div class="sectionbody">
<div class="listingblock">
<div class="content">
<pre class="CodeRay highlight"><code data-lang="java">// Use Apache Parquet, Apache Avro, or Apache ORC instead
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.avro.file.DataFileWriter;</code></pre>
</div>
</div>
</div>
</div>
16 changes: 16 additions & 0 deletions src/main/resources/org/green-code-initiative/rules/java/GCI99.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"title": "Avoid CSV format, prefer Parquet.",
"type": "CODE_SMELL",
"status": "ready",
"remediation": {
"func": "Constant\/Issue",
"constantCost": "20min"
},
"tags": [
"eco-design",
"performance",
"data",
"creedengo"
],
"defaultSeverity": "Minor"
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"GCI78",
"GCI79",
"GCI82",
"GCI94"
"GCI94",
"GCI99"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs
* Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.greencodeinitiative.creedengo.java.checks.GCI99;

import org.greencodeinitiative.creedengo.java.checks.AvoidCSVFormat;
import org.junit.jupiter.api.Test;
import org.sonar.java.checks.verifier.CheckVerifier;

class AvoidCSVFormatTest {

@Test
void testHasIssues() {
CheckVerifier.newVerifier()
.onFile(System.getProperty("testfiles.path") + "/GCI99/AvoidCSVFormat.java")
.withCheck(new AvoidCSVFormat())
.verifyIssues();
}

@Test
void testNoIssues() {
CheckVerifier.newVerifier()
.onFile(System.getProperty("testfiles.path") + "/GCI99/AvoidCSVFormatNoIssue.java")
.withCheck(new AvoidCSVFormat())
.verifyNoIssues();
}

}