diff --git a/core-java-modules/core-java-lang-8/src/main/java/com/baeldung/sparkdataframeconcat/ConcatRowsExample.java b/core-java-modules/core-java-lang-8/src/main/java/com/baeldung/sparkdataframeconcat/ConcatRowsExample.java new file mode 100644 index 000000000000..f5c2acd274b6 --- /dev/null +++ b/core-java-modules/core-java-lang-8/src/main/java/com/baeldung/sparkdataframeconcat/ConcatRowsExample.java @@ -0,0 +1,66 @@ +package com.example.items; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; + +import java.util.Arrays; + +public class ConcatRowsExample { + public static void main(String[] args) { + SparkSession spark = SparkSession.builder() + .appName("Row-wise Concatenation Example") + .master("local[*]") + .getOrCreate(); + + try { + // First DataFrame + Dataset df1 = spark.createDataFrame( + Arrays.asList( + new Person(1, "Alice"), + new Person(2, "Bob") + ), + Person.class + ); + + // Second DataFrame + Dataset df2 = spark.createDataFrame( + Arrays.asList( + new Person(3, "Charlie"), + new Person(4, "Diana") + ), + Person.class + ); + + System.out.println("First DataFrame:"); + df1.show(); + + System.out.println("Second DataFrame:"); + df2.show(); + + // Row-wise concatenation (union) + Dataset combined = df1.unionByName(df2); + + System.out.println("After row-wise concatenation:"); + combined.show(); + } finally { + spark.stop(); + } + } + + public static class Person implements java.io.Serializable { + private int id; + private String name; + + public Person() {} + public Person(int id, String name) { + this.id = id; + this.name = name; + } + + public int getId() { return id; } + public void setId(int id) { this.id = id; } + public String getName() { return name; } + public void setName(String name) { this.name = name; } + } +} diff --git a/core-java-modules/core-java-lang-8/src/test/java/com/baeldung/sparkdataframeconcat/ConcatRowsExampleUnitTest.java b/core-java-modules/core-java-lang-8/src/test/java/com/baeldung/sparkdataframeconcat/ConcatRowsExampleUnitTest.java new file mode 100644 index 000000000000..c2bc6cc3df64 --- /dev/null +++ b/core-java-modules/core-java-lang-8/src/test/java/com/baeldung/sparkdataframeconcat/ConcatRowsExampleUnitTest.java @@ -0,0 +1,70 @@ +package com.example.items; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; + +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.*; + +class ConcatRowsExampleUnitTest { + + private static SparkSession spark; + private Dataset df1; + private Dataset df2; + + @BeforeAll + static void setupClass() { + spark = SparkSession.builder() + .appName("Row-wise Concatenation Test") + .master("local[*]") + .getOrCreate(); + } + + @BeforeEach + void setup() { + df1 = spark.createDataFrame( + Arrays.asList( + new ConcatRowsExample.Person(1, "Alice"), + new ConcatRowsExample.Person(2, "Bob") + ), + ConcatRowsExample.Person.class + ); + + df2 = spark.createDataFrame( + Arrays.asList( + new ConcatRowsExample.Person(3, "Charlie"), + new ConcatRowsExample.Person(4, "Diana") + ), + ConcatRowsExample.Person.class + ); + } + + @AfterAll + static void tearDownClass() { + spark.stop(); + } + + @Test + void testRowConcatenationCount() { + Dataset combined = df1.unionByName(df2); + assertEquals(4, combined.count(), "The combined DataFrame should have 4 rows"); + } + + @Test + void testSchemaIsSame() { + Dataset combined = df1.unionByName(df2); + assertEquals(df1.schema(), combined.schema(), "Schema should remain consistent after concatenation"); + } + + @Test + void testDataContainsExpectedName() { + Dataset combined = df1.unionByName(df2); + assertTrue( + combined.filter("name = 'Charlie'").count() > 0, + "Combined DataFrame should contain Charlie" + ); + } +}