Skip to content

Scala 集合

Scala 集合框架是该语言最强大的特性之一,提供了丰富的数据结构和操作方法。集合分为可变(mutable)和不可变(immutable)两大类。

集合层次结构

集合类型概览

scala
object CollectionOverview {
  def main(args: Array[String]): Unit = {
    // 不可变集合(默认)
    val list = List(1, 2, 3, 4, 5)
    val vector = Vector(1, 2, 3, 4, 5)
    val set = Set(1, 2, 3, 4, 5)
    val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
    
    println(s"List: $list")
    println(s"Vector: $vector")
    println(s"Set: $set")
    println(s"Map: $map")
    
    // 可变集合
    import scala.collection.mutable
    val mutableList = mutable.ListBuffer(1, 2, 3)
    val mutableSet = mutable.Set(1, 2, 3)
    val mutableMap = mutable.Map("a" -> 1, "b" -> 2)
    
    println(s"Mutable List: $mutableList")
    println(s"Mutable Set: $mutableSet")
    println(s"Mutable Map: $mutableMap")
    
    // 修改可变集合
    mutableList += 4
    mutableSet += 4
    mutableMap += ("d" -> 4)
    
    println(s"After modification:")
    println(s"Mutable List: $mutableList")
    println(s"Mutable Set: $mutableSet")
    println(s"Mutable Map: $mutableMap")
  }
}

List(列表)

List 基础操作

scala
object ListOperations {
  def main(args: Array[String]): Unit = {
    // 创建 List
    val list1 = List(1, 2, 3, 4, 5)
    val list2 = 1 :: 2 :: 3 :: 4 :: 5 :: Nil
    val list3 = List.range(1, 6)
    val list4 = List.fill(5)(0)
    val list5 = List.tabulate(5)(i => i * i)
    
    println(s"list1: $list1")
    println(s"list2: $list2")
    println(s"list3: $list3")
    println(s"list4: $list4")
    println(s"list5: $list5")
    
    // 基本操作
    println(s"Head: ${list1.head}")
    println(s"Tail: ${list1.tail}")
    println(s"Last: ${list1.last}")
    println(s"Init: ${list1.init}")
    println(s"Length: ${list1.length}")
    println(s"Is empty: ${list1.isEmpty}")
    
    // 添加元素
    val newList1 = 0 :: list1  // 前置
    val newList2 = list1 :+ 6  // 后置
    val newList3 = list1 ++ List(6, 7, 8)  // 连接
    
    println(s"Prepend 0: $newList1")
    println(s"Append 6: $newList2")
    println(s"Concatenate: $newList3")
    
    // 访问元素
    println(s"Element at index 2: ${list1(2)}")
    println(s"Take 3: ${list1.take(3)}")
    println(s"Drop 2: ${list1.drop(2)}")
    println(s"Slice(1, 4): ${list1.slice(1, 4)}")
  }
}

List 高阶函数

scala
object ListHigherOrderFunctions {
  def main(args: Array[String]): Unit = {
    val numbers = List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val words = List("scala", "java", "python", "javascript")
    
    // map - 变换
    val doubled = numbers.map(_ * 2)
    val lengths = words.map(_.length)
    println(s"Doubled: $doubled")
    println(s"Word lengths: $lengths")
    
    // filter - 过滤
    val evens = numbers.filter(_ % 2 == 0)
    val longWords = words.filter(_.length > 4)
    println(s"Even numbers: $evens")
    println(s"Long words: $longWords")
    
    // flatMap - 扁平化映射
    val chars = words.flatMap(_.toList)
    val pairs = numbers.flatMap(x => List(x, x * 10))
    println(s"All characters: $chars")
    println(s"Pairs: $pairs")
    
    // reduce 和 fold
    val sum = numbers.reduce(_ + _)
    val product = numbers.fold(1)(_ * _)
    val max = numbers.reduce(_ max _)
    println(s"Sum: $sum")
    println(s"Product: $product")
    println(s"Max: $max")
    
    // scan - 扫描(保留中间结果)
    val runningSum = numbers.scanLeft(0)(_ + _)
    val runningProduct = numbers.scanLeft(1)(_ * _)
    println(s"Running sum: $runningSum")
    println(s"Running product: $runningProduct")
    
    // 分组和分割
    val (evens2, odds) = numbers.partition(_ % 2 == 0)
    val grouped = numbers.groupBy(_ % 3)
    println(s"Evens: $evens2, Odds: $odds")
    println(s"Grouped by remainder: $grouped")
    
    // 排序
    val shuffled = List(5, 2, 8, 1, 9, 3)
    println(s"Sorted: ${shuffled.sorted}")
    println(s"Sorted descending: ${shuffled.sortWith(_ > _)}")
    println(s"Sort by length: ${words.sortBy(_.length)}")
  }
}

Vector(向量)

Vector 特性

scala
object VectorOperations {
  def main(args: Array[String]): Unit = {
    // Vector 创建
    val vector1 = Vector(1, 2, 3, 4, 5)
    val vector2 = Vector.range(1, 6)
    val vector3 = Vector.fill(5)(0)
    
    println(s"vector1: $vector1")
    println(s"vector2: $vector2")
    println(s"vector3: $vector3")
    
    // Vector 的优势:高效的随机访问和更新
    val largeVector = Vector.range(1, 1000000)
    
    // 随机访问 - O(log n) 但实际上接近 O(1)
    val element = largeVector(500000)
    println(s"Element at 500000: $element")
    
    // 更新操作 - 创建新的 Vector
    val updated = vector1.updated(2, 99)
    println(s"Original: $vector1")
    println(s"Updated: $updated")
    
    // 添加元素
    val prepended = 0 +: vector1
    val appended = vector1 :+ 6
    println(s"Prepended: $prepended")
    println(s"Appended: $appended")
    
    // Vector vs List 性能比较
    def timeOperation[T](operation: => T): Long = {
      val start = System.nanoTime()
      operation
      val end = System.nanoTime()
      end - start
    }
    
    val size = 100000
    val list = List.range(1, size)
    val vector = Vector.range(1, size)
    
    // 随机访问性能
    val listAccessTime = timeOperation(list(size / 2))
    val vectorAccessTime = timeOperation(vector(size / 2))
    
    println(s"List random access: ${listAccessTime}ns")
    println(s"Vector random access: ${vectorAccessTime}ns")
    
    // 前置添加性能
    val listPrependTime = timeOperation(0 :: list)
    val vectorPrependTime = timeOperation(0 +: vector)
    
    println(s"List prepend: ${listPrependTime}ns")
    println(s"Vector prepend: ${vectorPrependTime}ns")
  }
}

Set(集合)

Set 操作

scala
object SetOperations {
  def main(args: Array[String]): Unit = {
    // Set 创建
    val set1 = Set(1, 2, 3, 4, 5)
    val set2 = Set(4, 5, 6, 7, 8)
    val set3 = Set(1, 1, 2, 2, 3, 3)  // 自动去重
    
    println(s"set1: $set1")
    println(s"set2: $set2")
    println(s"set3 (duplicates removed): $set3")
    
    // 基本操作
    println(s"Contains 3: ${set1.contains(3)}")
    println(s"Size: ${set1.size}")
    println(s"Is empty: ${set1.isEmpty}")
    
    // 添加和删除元素
    val added = set1 + 6
    val removed = set1 - 3
    val multipleAdded = set1 ++ Set(6, 7, 8)
    val multipleRemoved = set1 -- Set(1, 2)
    
    println(s"Added 6: $added")
    println(s"Removed 3: $removed")
    println(s"Multiple added: $multipleAdded")
    println(s"Multiple removed: $multipleRemoved")
    
    // 集合运算
    val union = set1 union set2  // 或 set1 | set2
    val intersection = set1 intersect set2  // 或 set1 & set2
    val difference = set1 diff set2  // 或 set1 &~ set2
    
    println(s"Union: $union")
    println(s"Intersection: $intersection")
    println(s"Difference: $difference")
    
    // 子集和超集
    val subset = Set(1, 2, 3)
    println(s"$subset is subset of $set1: ${subset.subsetOf(set1)}")
    println(s"$set1 is superset of $subset: ${set1.subsetOf(subset)}")
    
    // 不同类型的 Set
    import scala.collection.mutable
    val mutableSet = mutable.Set(1, 2, 3)
    mutableSet += 4
    mutableSet -= 1
    println(s"Mutable set: $mutableSet")
    
    // SortedSet - 有序集合
    import scala.collection.immutable.SortedSet
    val sortedSet = SortedSet(5, 1, 3, 2, 4)
    println(s"Sorted set: $sortedSet")
  }
}

Map(映射)

Map 基础操作

scala
object MapOperations {
  def main(args: Array[String]): Unit = {
    // Map 创建
    val map1 = Map("a" -> 1, "b" -> 2, "c" -> 3)
    val map2 = Map(("x", 10), ("y", 20), ("z", 30))
    val map3 = Map.empty[String, Int]
    
    println(s"map1: $map1")
    println(s"map2: $map2")
    println(s"map3: $map3")
    
    // 访问元素
    println(s"Value for 'a': ${map1("a")}")
    println(s"Get 'a': ${map1.get("a")}")
    println(s"Get 'd': ${map1.get("d")}")
    println(s"Get 'd' with default: ${map1.getOrElse("d", 0)}")
    
    // 检查键的存在
    println(s"Contains 'b': ${map1.contains("b")}")
    println(s"Contains 'd': ${map1.contains("d")}")
    
    // 添加和更新
    val updated = map1 + ("d" -> 4)
    val multipleUpdated = map1 ++ Map("d" -> 4, "e" -> 5)
    val removed = map1 - "a"
    val multipleRemoved = map1 -- List("a", "b")
    
    println(s"Updated: $updated")
    println(s"Multiple updated: $multipleUpdated")
    println(s"Removed: $removed")
    println(s"Multiple removed: $multipleRemoved")
    
    // 键和值
    println(s"Keys: ${map1.keys}")
    println(s"Values: ${map1.values}")
    println(s"Key-value pairs: ${map1.toList}")
    
    // Map 操作
    val doubled = map1.map { case (k, v) => k -> (v * 2) }
    val filtered = map1.filter { case (k, v) => v > 1 }
    
    println(s"Doubled values: $doubled")
    println(s"Filtered (value > 1): $filtered")
  }
}

Map 高级操作

scala
object AdvancedMapOperations {
  def main(args: Array[String]): Unit = {
    val scores = Map(
      "Alice" -> 95,
      "Bob" -> 87,
      "Charlie" -> 92,
      "Diana" -> 78
    )
    
    // 查找操作
    val topStudent = scores.maxBy(_._2)
    val bottomStudent = scores.minBy(_._2)
    val averageScore = scores.values.sum.toDouble / scores.size
    
    println(s"Top student: $topStudent")
    println(s"Bottom student: $bottomStudent")
    println(s"Average score: $averageScore")
    
    // 分组操作
    val gradeRanges = scores.groupBy { case (_, score) =>
      score match {
        case s if s >= 90 => "A"
        case s if s >= 80 => "B"
        case s if s >= 70 => "C"
        case _ => "F"
      }
    }
    
    println("Grade distribution:")
    gradeRanges.foreach { case (grade, students) =>
      println(s"Grade $grade: ${students.keys.mkString(", ")}")
    }
    
    // Map 合并
    val bonusPoints = Map("Alice" -> 5, "Bob" -> 3, "Eve" -> 10)
    
    val finalScores = scores ++ bonusPoints.map { case (name, bonus) =>
      name -> (scores.getOrElse(name, 0) + bonus)
    }
    
    println(s"Final scores: $finalScores")
    
    // 嵌套 Map
    val studentData = Map(
      "Alice" -> Map("age" -> 20, "grade" -> 95, "year" -> 3),
      "Bob" -> Map("age" -> 19, "grade" -> 87, "year" -> 2)
    )
    
    println(s"Alice's age: ${studentData("Alice")("age")}")
    
    // 可变 Map
    import scala.collection.mutable
    val mutableScores = mutable.Map("Alice" -> 95, "Bob" -> 87)
    
    mutableScores("Charlie") = 92  // 添加新元素
    mutableScores("Alice") = 98    // 更新现有元素
    mutableScores.remove("Bob")    // 删除元素
    
    println(s"Mutable scores: $mutableScores")
  }
}

集合转换

集合间转换

scala
object CollectionConversions {
  def main(args: Array[String]): Unit = {
    val list = List(1, 2, 3, 4, 5, 2, 3)
    val array = Array(1, 2, 3, 4, 5)
    val set = Set(1, 2, 3, 4, 5)
    val map = Map("a" -> 1, "b" -> 2, "c" -> 3)
    
    // 从 List 转换
    println("From List:")
    println(s"List to Vector: ${list.toVector}")
    println(s"List to Set: ${list.toSet}")  // 去重
    println(s"List to Array: ${list.toArray.mkString(", ")}")
    
    // 从 Array 转换
    println("\nFrom Array:")
    println(s"Array to List: ${array.toList}")
    println(s"Array to Vector: ${array.toVector}")
    println(s"Array to Set: ${array.toSet}")
    
    // 从 Set 转换
    println("\nFrom Set:")
    println(s"Set to List: ${set.toList}")
    println(s"Set to Vector: ${set.toVector}")
    println(s"Set to Array: ${set.toArray.mkString(", ")}")
    
    // 从 Map 转换
    println("\nFrom Map:")
    println(s"Map to List: ${map.toList}")
    println(s"Map keys to Set: ${map.keySet}")
    println(s"Map values to List: ${map.values.toList}")
    
    // 字符串转换
    val string = "hello"
    println(s"\nString to List: ${string.toList}")
    println(s"String to Vector: ${string.toVector}")
    println(s"String to Set: ${string.toSet}")
    
    // Range 转换
    val range = 1 to 10
    println(s"\nRange to List: ${range.toList}")
    println(s"Range to Vector: ${range.toVector}")
    println(s"Range to Set: ${range.toSet}")
  }
}

集合性能比较

性能特性

scala
object CollectionPerformance {
  def main(args: Array[String]): Unit = {
    val size = 100000
    
    def timeOperation[T](name: String)(operation: => T): T = {
      val start = System.nanoTime()
      val result = operation
      val end = System.nanoTime()
      println(f"$name%20s: ${(end - start) / 1000000}%6d ms")
      result
    }
    
    println("Collection Performance Comparison:")
    println("=" * 50)
    
    // 创建性能
    println("\nCreation Performance:")
    val list = timeOperation("List creation") { List.range(1, size) }
    val vector = timeOperation("Vector creation") { Vector.range(1, size) }
    val array = timeOperation("Array creation") { Array.range(1, size) }
    
    // 随机访问性能
    println("\nRandom Access Performance:")
    val index = size / 2
    timeOperation("List access") { list(index) }
    timeOperation("Vector access") { vector(index) }
    timeOperation("Array access") { array(index) }
    
    // 前置添加性能
    println("\nPrepend Performance:")
    timeOperation("List prepend") { 0 :: list }
    timeOperation("Vector prepend") { 0 +: vector }
    
    // 后置添加性能
    println("\nAppend Performance:")
    timeOperation("List append") { list :+ (size + 1) }
    timeOperation("Vector append") { vector :+ (size + 1) }
    
    // 遍历性能
    println("\nIteration Performance:")
    timeOperation("List sum") { list.sum }
    timeOperation("Vector sum") { vector.sum }
    timeOperation("Array sum") { array.sum }
    
    // 内存使用建议
    println("\nMemory and Performance Guidelines:")
    println("List: Best for sequential access, prepending")
    println("Vector: Best for random access, general purpose")
    println("Array: Best for performance-critical code, interop with Java")
    println("Set: Best for membership testing, uniqueness")
    println("Map: Best for key-value lookups")
  }
}

实际应用示例

数据处理管道

scala
object DataProcessingPipeline {
  case class Person(name: String, age: Int, city: String, salary: Double)
  
  def main(args: Array[String]): Unit = {
    val people = List(
      Person("Alice", 25, "New York", 75000),
      Person("Bob", 30, "San Francisco", 95000),
      Person("Charlie", 35, "New York", 85000),
      Person("Diana", 28, "Boston", 70000),
      Person("Eve", 32, "San Francisco", 105000),
      Person("Frank", 29, "Boston", 68000)
    )
    
    println("Original data:")
    people.foreach(println)
    
    // 数据处理管道
    val analysis = people
      .filter(_.age >= 28)  // 过滤年龄
      .groupBy(_.city)      // 按城市分组
      .view.mapValues { cityPeople =>
        Map(
          "count" -> cityPeople.size,
          "avgAge" -> cityPeople.map(_.age).sum.toDouble / cityPeople.size,
          "avgSalary" -> cityPeople.map(_.salary).sum / cityPeople.size,
          "totalSalary" -> cityPeople.map(_.salary).sum
        )
      }.toMap
    
    println("\nAnalysis by city (age >= 28):")
    analysis.foreach { case (city, stats) =>
      println(s"$city:")
      stats.foreach { case (metric, value) =>
        println(f"  $metric: $value%.2f")
      }
    }
    
    // 薪资统计
    val salaryStats = people.map(_.salary)
    val sortedSalaries = salaryStats.sorted
    val median = if (sortedSalaries.length % 2 == 0) {
      (sortedSalaries(sortedSalaries.length / 2 - 1) + sortedSalaries(sortedSalaries.length / 2)) / 2
    } else {
      sortedSalaries(sortedSalaries.length / 2)
    }
    
    println(f"\nSalary Statistics:")
    println(f"Average: ${salaryStats.sum / salaryStats.length}%.2f")
    println(f"Median: $median%.2f")
    println(f"Min: ${salaryStats.min}%.2f")
    println(f"Max: ${salaryStats.max}%.2f")
    
    // 年龄分布
    val ageGroups = people.groupBy { person =>
      person.age match {
        case age if age < 30 => "20s"
        case age if age < 40 => "30s"
        case _ => "40+"
      }
    }
    
    println("\nAge Distribution:")
    ageGroups.foreach { case (group, people) =>
      println(s"$group: ${people.map(_.name).mkString(", ")}")
    }
  }
}

缓存系统

scala
import scala.collection.mutable

object CacheSystem {
  class LRUCache[K, V](maxSize: Int) {
    private val cache = mutable.LinkedHashMap[K, V]()
    
    def get(key: K): Option[V] = {
      cache.remove(key) match {
        case Some(value) =>
          cache(key) = value  // 移到最后(最近使用)
          Some(value)
        case None => None
      }
    }
    
    def put(key: K, value: V): Unit = {
      cache.remove(key)  // 如果存在,先删除
      cache(key) = value
      
      // 如果超过最大大小,删除最老的元素
      if (cache.size > maxSize) {
        cache.remove(cache.head._1)
      }
    }
    
    def size: Int = cache.size
    def keys: Set[K] = cache.keySet.toSet
    
    override def toString: String = cache.toString()
  }
  
  def main(args: Array[String]): Unit = {
    val cache = new LRUCache[String, Int](3)
    
    // 添加元素
    cache.put("a", 1)
    cache.put("b", 2)
    cache.put("c", 3)
    println(s"After adding a, b, c: $cache")
    
    // 访问元素
    println(s"Get 'a': ${cache.get("a")}")
    println(s"After accessing 'a': $cache")
    
    // 添加新元素,应该删除最老的 'b'
    cache.put("d", 4)
    println(s"After adding 'd': $cache")
    
    // 测试缓存命中率
    val requests = List("a", "b", "c", "d", "a", "e", "f", "a")
    var hits = 0
    var misses = 0
    
    requests.foreach { key =>
      cache.get(key) match {
        case Some(_) => hits += 1
        case None => 
          misses += 1
          cache.put(key, key.hashCode)
      }
    }
    
    println(s"\nCache performance:")
    println(s"Hits: $hits, Misses: $misses")
    println(s"Hit rate: ${hits.toDouble / (hits + misses) * 100}%")
  }
}

最佳实践

  1. 选择合适的集合类型

    • 需要顺序访问:List
    • 需要随机访问:Vector
    • 需要唯一性:Set
    • 需要键值映射:Map
  2. 不可变 vs 可变

    • 默认使用不可变集合
    • 性能关键场景考虑可变集合
    • 函数式编程优先不可变
  3. 性能考虑

    • List 适合前置操作
    • Vector 适合随机访问
    • Array 适合性能关键场景
  4. 内存效率

    • 使用 view 进行惰性计算
    • 避免不必要的中间集合
    • 考虑使用 Iterator 处理大数据
  5. 函数式编程

    • 使用 mapfilterreduce 等高阶函数
    • 链式操作提高可读性
    • 避免副作用

Scala 集合框架提供了强大而灵活的数据处理能力,掌握这些集合类型和操作方法是编写高效 Scala 程序的关键。

本站内容仅供学习和研究使用。