这是indexloc提供的服务,不要输入任何密码
Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.example.items;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.util.Arrays;

public class ConcatRowsExample {
public static void main(String[] args) {
SparkSession spark = SparkSession.builder()
.appName("Row-wise Concatenation Example")
.master("local[*]")
.getOrCreate();

try {
// First DataFrame
Dataset<Row> df1 = spark.createDataFrame(
Arrays.asList(
new Person(1, "Alice"),
new Person(2, "Bob")
),
Person.class
);

// Second DataFrame
Dataset<Row> df2 = spark.createDataFrame(
Arrays.asList(
new Person(3, "Charlie"),
new Person(4, "Diana")
),
Person.class
);

System.out.println("First DataFrame:");
df1.show();

System.out.println("Second DataFrame:");
df2.show();

// Row-wise concatenation (union)
Dataset<Row> combined = df1.unionByName(df2);

System.out.println("After row-wise concatenation:");
combined.show();
} finally {
spark.stop();
}
}

public static class Person implements java.io.Serializable {
private int id;
private String name;

public Person() {}
public Person(int id, String name) {
this.id = id;
this.name = name;
}

public int getId() { return id; }
public void setId(int id) { this.id = id; }
public String getName() { return name; }
public void setName(String name) { this.name = name; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.example.items;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.*;

import java.util.Arrays;

import static org.junit.jupiter.api.Assertions.*;

class ConcatRowsExampleUnitTest {

private static SparkSession spark;
private Dataset<Row> df1;
private Dataset<Row> df2;

@BeforeAll
static void setupClass() {
spark = SparkSession.builder()
.appName("Row-wise Concatenation Test")
.master("local[*]")
.getOrCreate();
}

@BeforeEach
void setup() {
df1 = spark.createDataFrame(
Arrays.asList(
new ConcatRowsExample.Person(1, "Alice"),
new ConcatRowsExample.Person(2, "Bob")
),
ConcatRowsExample.Person.class
);

df2 = spark.createDataFrame(
Arrays.asList(
new ConcatRowsExample.Person(3, "Charlie"),
new ConcatRowsExample.Person(4, "Diana")
),
ConcatRowsExample.Person.class
);
}

@AfterAll
static void tearDownClass() {
spark.stop();
}

@Test
void testRowConcatenationCount() {
Dataset<Row> combined = df1.unionByName(df2);
assertEquals(4, combined.count(), "The combined DataFrame should have 4 rows");
}

@Test
void testSchemaIsSame() {
Dataset<Row> combined = df1.unionByName(df2);
assertEquals(df1.schema(), combined.schema(), "Schema should remain consistent after concatenation");
}

@Test
void testDataContainsExpectedName() {
Dataset<Row> combined = df1.unionByName(df2);
assertTrue(
combined.filter("name = 'Charlie'").count() > 0,
"Combined DataFrame should contain Charlie"
);
}
}