Scala 集合
Scala 集合框架是该语言最强大的特性之一,提供了丰富的数据结构和操作方法。集合分为可变(mutable)和不可变(immutable)两大类。
集合层次结构
集合类型概览
scala
object CollectionOverview {
def main(args: Array[String]): Unit = {
// 不可变集合(默认)
val list = List(1, 2, 3, 4, 5)
val vector = Vector(1, 2, 3, 4, 5)
val set = Set(1, 2, 3, 4, 5)
val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
println(s"List: $list")
println(s"Vector: $vector")
println(s"Set: $set")
println(s"Map: $map")
// 可变集合
import scala.collection.mutable
val mutableList = mutable.ListBuffer(1, 2, 3)
val mutableSet = mutable.Set(1, 2, 3)
val mutableMap = mutable.Map("a" -> 1, "b" -> 2)
println(s"Mutable List: $mutableList")
println(s"Mutable Set: $mutableSet")
println(s"Mutable Map: $mutableMap")
// 修改可变集合
mutableList += 4
mutableSet += 4
mutableMap += ("d" -> 4)
println(s"After modification:")
println(s"Mutable List: $mutableList")
println(s"Mutable Set: $mutableSet")
println(s"Mutable Map: $mutableMap")
}
}List(列表)
List 基础操作
scala
object ListOperations {
def main(args: Array[String]): Unit = {
// 创建 List
val list1 = List(1, 2, 3, 4, 5)
val list2 = 1 :: 2 :: 3 :: 4 :: 5 :: Nil
val list3 = List.range(1, 6)
val list4 = List.fill(5)(0)
val list5 = List.tabulate(5)(i => i * i)
println(s"list1: $list1")
println(s"list2: $list2")
println(s"list3: $list3")
println(s"list4: $list4")
println(s"list5: $list5")
// 基本操作
println(s"Head: ${list1.head}")
println(s"Tail: ${list1.tail}")
println(s"Last: ${list1.last}")
println(s"Init: ${list1.init}")
println(s"Length: ${list1.length}")
println(s"Is empty: ${list1.isEmpty}")
// 添加元素
val newList1 = 0 :: list1 // 前置
val newList2 = list1 :+ 6 // 后置
val newList3 = list1 ++ List(6, 7, 8) // 连接
println(s"Prepend 0: $newList1")
println(s"Append 6: $newList2")
println(s"Concatenate: $newList3")
// 访问元素
println(s"Element at index 2: ${list1(2)}")
println(s"Take 3: ${list1.take(3)}")
println(s"Drop 2: ${list1.drop(2)}")
println(s"Slice(1, 4): ${list1.slice(1, 4)}")
}
}List 高阶函数
scala
object ListHigherOrderFunctions {
def main(args: Array[String]): Unit = {
val numbers = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val words = List("scala", "java", "python", "javascript")
// map - 变换
val doubled = numbers.map(_ * 2)
val lengths = words.map(_.length)
println(s"Doubled: $doubled")
println(s"Word lengths: $lengths")
// filter - 过滤
val evens = numbers.filter(_ % 2 == 0)
val longWords = words.filter(_.length > 4)
println(s"Even numbers: $evens")
println(s"Long words: $longWords")
// flatMap - 扁平化映射
val chars = words.flatMap(_.toList)
val pairs = numbers.flatMap(x => List(x, x * 10))
println(s"All characters: $chars")
println(s"Pairs: $pairs")
// reduce 和 fold
val sum = numbers.reduce(_ + _)
val product = numbers.fold(1)(_ * _)
val max = numbers.reduce(_ max _)
println(s"Sum: $sum")
println(s"Product: $product")
println(s"Max: $max")
// scan - 扫描(保留中间结果)
val runningSum = numbers.scanLeft(0)(_ + _)
val runningProduct = numbers.scanLeft(1)(_ * _)
println(s"Running sum: $runningSum")
println(s"Running product: $runningProduct")
// 分组和分割
val (evens2, odds) = numbers.partition(_ % 2 == 0)
val grouped = numbers.groupBy(_ % 3)
println(s"Evens: $evens2, Odds: $odds")
println(s"Grouped by remainder: $grouped")
// 排序
val shuffled = List(5, 2, 8, 1, 9, 3)
println(s"Sorted: ${shuffled.sorted}")
println(s"Sorted descending: ${shuffled.sortWith(_ > _)}")
println(s"Sort by length: ${words.sortBy(_.length)}")
}
}Vector(向量)
Vector 特性
scala
object VectorOperations {
def main(args: Array[String]): Unit = {
// Vector 创建
val vector1 = Vector(1, 2, 3, 4, 5)
val vector2 = Vector.range(1, 6)
val vector3 = Vector.fill(5)(0)
println(s"vector1: $vector1")
println(s"vector2: $vector2")
println(s"vector3: $vector3")
// Vector 的优势:高效的随机访问和更新
val largeVector = Vector.range(1, 1000000)
// 随机访问 - O(log n) 但实际上接近 O(1)
val element = largeVector(500000)
println(s"Element at 500000: $element")
// 更新操作 - 创建新的 Vector
val updated = vector1.updated(2, 99)
println(s"Original: $vector1")
println(s"Updated: $updated")
// 添加元素
val prepended = 0 +: vector1
val appended = vector1 :+ 6
println(s"Prepended: $prepended")
println(s"Appended: $appended")
// Vector vs List 性能比较
def timeOperation[T](operation: => T): Long = {
val start = System.nanoTime()
operation
val end = System.nanoTime()
end - start
}
val size = 100000
val list = List.range(1, size)
val vector = Vector.range(1, size)
// 随机访问性能
val listAccessTime = timeOperation(list(size / 2))
val vectorAccessTime = timeOperation(vector(size / 2))
println(s"List random access: ${listAccessTime}ns")
println(s"Vector random access: ${vectorAccessTime}ns")
// 前置添加性能
val listPrependTime = timeOperation(0 :: list)
val vectorPrependTime = timeOperation(0 +: vector)
println(s"List prepend: ${listPrependTime}ns")
println(s"Vector prepend: ${vectorPrependTime}ns")
}
}Set(集合)
Set 操作
scala
object SetOperations {
def main(args: Array[String]): Unit = {
// Set 创建
val set1 = Set(1, 2, 3, 4, 5)
val set2 = Set(4, 5, 6, 7, 8)
val set3 = Set(1, 1, 2, 2, 3, 3) // 自动去重
println(s"set1: $set1")
println(s"set2: $set2")
println(s"set3 (duplicates removed): $set3")
// 基本操作
println(s"Contains 3: ${set1.contains(3)}")
println(s"Size: ${set1.size}")
println(s"Is empty: ${set1.isEmpty}")
// 添加和删除元素
val added = set1 + 6
val removed = set1 - 3
val multipleAdded = set1 ++ Set(6, 7, 8)
val multipleRemoved = set1 -- Set(1, 2)
println(s"Added 6: $added")
println(s"Removed 3: $removed")
println(s"Multiple added: $multipleAdded")
println(s"Multiple removed: $multipleRemoved")
// 集合运算
val union = set1 union set2 // 或 set1 | set2
val intersection = set1 intersect set2 // 或 set1 & set2
val difference = set1 diff set2 // 或 set1 &~ set2
println(s"Union: $union")
println(s"Intersection: $intersection")
println(s"Difference: $difference")
// 子集和超集
val subset = Set(1, 2, 3)
println(s"$subset is subset of $set1: ${subset.subsetOf(set1)}")
println(s"$set1 is superset of $subset: ${set1.subsetOf(subset)}")
// 不同类型的 Set
import scala.collection.mutable
val mutableSet = mutable.Set(1, 2, 3)
mutableSet += 4
mutableSet -= 1
println(s"Mutable set: $mutableSet")
// SortedSet - 有序集合
import scala.collection.immutable.SortedSet
val sortedSet = SortedSet(5, 1, 3, 2, 4)
println(s"Sorted set: $sortedSet")
}
}Map(映射)
Map 基础操作
scala
object MapOperations {
def main(args: Array[String]): Unit = {
// Map 创建
val map1 = Map("a" -> 1, "b" -> 2, "c" -> 3)
val map2 = Map(("x", 10), ("y", 20), ("z", 30))
val map3 = Map.empty[String, Int]
println(s"map1: $map1")
println(s"map2: $map2")
println(s"map3: $map3")
// 访问元素
println(s"Value for 'a': ${map1("a")}")
println(s"Get 'a': ${map1.get("a")}")
println(s"Get 'd': ${map1.get("d")}")
println(s"Get 'd' with default: ${map1.getOrElse("d", 0)}")
// 检查键的存在
println(s"Contains 'b': ${map1.contains("b")}")
println(s"Contains 'd': ${map1.contains("d")}")
// 添加和更新
val updated = map1 + ("d" -> 4)
val multipleUpdated = map1 ++ Map("d" -> 4, "e" -> 5)
val removed = map1 - "a"
val multipleRemoved = map1 -- List("a", "b")
println(s"Updated: $updated")
println(s"Multiple updated: $multipleUpdated")
println(s"Removed: $removed")
println(s"Multiple removed: $multipleRemoved")
// 键和值
println(s"Keys: ${map1.keys}")
println(s"Values: ${map1.values}")
println(s"Key-value pairs: ${map1.toList}")
// Map 操作
val doubled = map1.map { case (k, v) => k -> (v * 2) }
val filtered = map1.filter { case (k, v) => v > 1 }
println(s"Doubled values: $doubled")
println(s"Filtered (value > 1): $filtered")
}
}Map 高级操作
scala
object AdvancedMapOperations {
def main(args: Array[String]): Unit = {
val scores = Map(
"Alice" -> 95,
"Bob" -> 87,
"Charlie" -> 92,
"Diana" -> 78
)
// 查找操作
val topStudent = scores.maxBy(_._2)
val bottomStudent = scores.minBy(_._2)
val averageScore = scores.values.sum.toDouble / scores.size
println(s"Top student: $topStudent")
println(s"Bottom student: $bottomStudent")
println(s"Average score: $averageScore")
// 分组操作
val gradeRanges = scores.groupBy { case (_, score) =>
score match {
case s if s >= 90 => "A"
case s if s >= 80 => "B"
case s if s >= 70 => "C"
case _ => "F"
}
}
println("Grade distribution:")
gradeRanges.foreach { case (grade, students) =>
println(s"Grade $grade: ${students.keys.mkString(", ")}")
}
// Map 合并
val bonusPoints = Map("Alice" -> 5, "Bob" -> 3, "Eve" -> 10)
val finalScores = scores ++ bonusPoints.map { case (name, bonus) =>
name -> (scores.getOrElse(name, 0) + bonus)
}
println(s"Final scores: $finalScores")
// 嵌套 Map
val studentData = Map(
"Alice" -> Map("age" -> 20, "grade" -> 95, "year" -> 3),
"Bob" -> Map("age" -> 19, "grade" -> 87, "year" -> 2)
)
println(s"Alice's age: ${studentData("Alice")("age")}")
// 可变 Map
import scala.collection.mutable
val mutableScores = mutable.Map("Alice" -> 95, "Bob" -> 87)
mutableScores("Charlie") = 92 // 添加新元素
mutableScores("Alice") = 98 // 更新现有元素
mutableScores.remove("Bob") // 删除元素
println(s"Mutable scores: $mutableScores")
}
}集合转换
集合间转换
scala
object CollectionConversions {
def main(args: Array[String]): Unit = {
val list = List(1, 2, 3, 4, 5, 2, 3)
val array = Array(1, 2, 3, 4, 5)
val set = Set(1, 2, 3, 4, 5)
val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
// 从 List 转换
println("From List:")
println(s"List to Vector: ${list.toVector}")
println(s"List to Set: ${list.toSet}") // 去重
println(s"List to Array: ${list.toArray.mkString(", ")}")
// 从 Array 转换
println("\nFrom Array:")
println(s"Array to List: ${array.toList}")
println(s"Array to Vector: ${array.toVector}")
println(s"Array to Set: ${array.toSet}")
// 从 Set 转换
println("\nFrom Set:")
println(s"Set to List: ${set.toList}")
println(s"Set to Vector: ${set.toVector}")
println(s"Set to Array: ${set.toArray.mkString(", ")}")
// 从 Map 转换
println("\nFrom Map:")
println(s"Map to List: ${map.toList}")
println(s"Map keys to Set: ${map.keySet}")
println(s"Map values to List: ${map.values.toList}")
// 字符串转换
val string = "hello"
println(s"\nString to List: ${string.toList}")
println(s"String to Vector: ${string.toVector}")
println(s"String to Set: ${string.toSet}")
// Range 转换
val range = 1 to 10
println(s"\nRange to List: ${range.toList}")
println(s"Range to Vector: ${range.toVector}")
println(s"Range to Set: ${range.toSet}")
}
}集合性能比较
性能特性
scala
object CollectionPerformance {
def main(args: Array[String]): Unit = {
val size = 100000
def timeOperation[T](name: String)(operation: => T): T = {
val start = System.nanoTime()
val result = operation
val end = System.nanoTime()
println(f"$name%20s: ${(end - start) / 1000000}%6d ms")
result
}
println("Collection Performance Comparison:")
println("=" * 50)
// 创建性能
println("\nCreation Performance:")
val list = timeOperation("List creation") { List.range(1, size) }
val vector = timeOperation("Vector creation") { Vector.range(1, size) }
val array = timeOperation("Array creation") { Array.range(1, size) }
// 随机访问性能
println("\nRandom Access Performance:")
val index = size / 2
timeOperation("List access") { list(index) }
timeOperation("Vector access") { vector(index) }
timeOperation("Array access") { array(index) }
// 前置添加性能
println("\nPrepend Performance:")
timeOperation("List prepend") { 0 :: list }
timeOperation("Vector prepend") { 0 +: vector }
// 后置添加性能
println("\nAppend Performance:")
timeOperation("List append") { list :+ (size + 1) }
timeOperation("Vector append") { vector :+ (size + 1) }
// 遍历性能
println("\nIteration Performance:")
timeOperation("List sum") { list.sum }
timeOperation("Vector sum") { vector.sum }
timeOperation("Array sum") { array.sum }
// 内存使用建议
println("\nMemory and Performance Guidelines:")
println("List: Best for sequential access, prepending")
println("Vector: Best for random access, general purpose")
println("Array: Best for performance-critical code, interop with Java")
println("Set: Best for membership testing, uniqueness")
println("Map: Best for key-value lookups")
}
}实际应用示例
数据处理管道
scala
object DataProcessingPipeline {
case class Person(name: String, age: Int, city: String, salary: Double)
def main(args: Array[String]): Unit = {
val people = List(
Person("Alice", 25, "New York", 75000),
Person("Bob", 30, "San Francisco", 95000),
Person("Charlie", 35, "New York", 85000),
Person("Diana", 28, "Boston", 70000),
Person("Eve", 32, "San Francisco", 105000),
Person("Frank", 29, "Boston", 68000)
)
println("Original data:")
people.foreach(println)
// 数据处理管道
val analysis = people
.filter(_.age >= 28) // 过滤年龄
.groupBy(_.city) // 按城市分组
.view.mapValues { cityPeople =>
Map(
"count" -> cityPeople.size,
"avgAge" -> cityPeople.map(_.age).sum.toDouble / cityPeople.size,
"avgSalary" -> cityPeople.map(_.salary).sum / cityPeople.size,
"totalSalary" -> cityPeople.map(_.salary).sum
)
}.toMap
println("\nAnalysis by city (age >= 28):")
analysis.foreach { case (city, stats) =>
println(s"$city:")
stats.foreach { case (metric, value) =>
println(f" $metric: $value%.2f")
}
}
// 薪资统计
val salaryStats = people.map(_.salary)
val sortedSalaries = salaryStats.sorted
val median = if (sortedSalaries.length % 2 == 0) {
(sortedSalaries(sortedSalaries.length / 2 - 1) + sortedSalaries(sortedSalaries.length / 2)) / 2
} else {
sortedSalaries(sortedSalaries.length / 2)
}
println(f"\nSalary Statistics:")
println(f"Average: ${salaryStats.sum / salaryStats.length}%.2f")
println(f"Median: $median%.2f")
println(f"Min: ${salaryStats.min}%.2f")
println(f"Max: ${salaryStats.max}%.2f")
// 年龄分布
val ageGroups = people.groupBy { person =>
person.age match {
case age if age < 30 => "20s"
case age if age < 40 => "30s"
case _ => "40+"
}
}
println("\nAge Distribution:")
ageGroups.foreach { case (group, people) =>
println(s"$group: ${people.map(_.name).mkString(", ")}")
}
}
}缓存系统
scala
import scala.collection.mutable
object CacheSystem {
class LRUCache[K, V](maxSize: Int) {
private val cache = mutable.LinkedHashMap[K, V]()
def get(key: K): Option[V] = {
cache.remove(key) match {
case Some(value) =>
cache(key) = value // 移到最后(最近使用)
Some(value)
case None => None
}
}
def put(key: K, value: V): Unit = {
cache.remove(key) // 如果存在,先删除
cache(key) = value
// 如果超过最大大小,删除最老的元素
if (cache.size > maxSize) {
cache.remove(cache.head._1)
}
}
def size: Int = cache.size
def keys: Set[K] = cache.keySet.toSet
override def toString: String = cache.toString()
}
def main(args: Array[String]): Unit = {
val cache = new LRUCache[String, Int](3)
// 添加元素
cache.put("a", 1)
cache.put("b", 2)
cache.put("c", 3)
println(s"After adding a, b, c: $cache")
// 访问元素
println(s"Get 'a': ${cache.get("a")}")
println(s"After accessing 'a': $cache")
// 添加新元素,应该删除最老的 'b'
cache.put("d", 4)
println(s"After adding 'd': $cache")
// 测试缓存命中率
val requests = List("a", "b", "c", "d", "a", "e", "f", "a")
var hits = 0
var misses = 0
requests.foreach { key =>
cache.get(key) match {
case Some(_) => hits += 1
case None =>
misses += 1
cache.put(key, key.hashCode)
}
}
println(s"\nCache performance:")
println(s"Hits: $hits, Misses: $misses")
println(s"Hit rate: ${hits.toDouble / (hits + misses) * 100}%")
}
}最佳实践
选择合适的集合类型:
- 需要顺序访问:
List - 需要随机访问:
Vector - 需要唯一性:
Set - 需要键值映射:
Map
- 需要顺序访问:
不可变 vs 可变:
- 默认使用不可变集合
- 性能关键场景考虑可变集合
- 函数式编程优先不可变
性能考虑:
List适合前置操作Vector适合随机访问Array适合性能关键场景
内存效率:
- 使用
view进行惰性计算 - 避免不必要的中间集合
- 考虑使用
Iterator处理大数据
- 使用
函数式编程:
- 使用
map、filter、reduce等高阶函数 - 链式操作提高可读性
- 避免副作用
- 使用
Scala 集合框架提供了强大而灵活的数据处理能力,掌握这些集合类型和操作方法是编写高效 Scala 程序的关键。