Skip to content

Julia 字典和集合

字典(Dict)和集合(Set)是 Julia 中重要的数据结构,用于存储键值对和唯一元素。

字典(Dict)

创建字典

julia
# 使用 Dict 构造函数
d = Dict("a" => 1, "b" => 2, "c" => 3)
println(d)

# 空字典
empty_dict = Dict()
empty_typed = Dict{String, Int}()

# 从数组创建
keys_arr = ["a", "b", "c"]
vals_arr = [1, 2, 3]
d = Dict(zip(keys_arr, vals_arr))

# 使用推导式
d = Dict(x => x^2 for x in 1:5)
println(d)  # Dict(1=>1, 2=>4, 3=>9, 4=>16, 5=>25)

访问元素

julia
d = Dict("apple" => 5, "banana" => 3, "cherry" => 8)

# 使用键访问
println(d["apple"])  # 5

# 使用 get(带默认值)
println(get(d, "apple", 0))    # 5
println(get(d, "orange", 0))   # 0(不存在时返回默认值)

# get! - 获取或插入默认值
println(get!(d, "orange", 10)) # 10(插入并返回)
println(d["orange"])           # 10

# 检查键是否存在
println(haskey(d, "apple"))    # true
println("apple" in keys(d))    # true

修改字典

julia
d = Dict{String, Int}()

# 添加/更新元素
d["a"] = 1
d["b"] = 2
println(d)

# 批量添加
merge!(d, Dict("c" => 3, "d" => 4))
println(d)

# 删除元素
delete!(d, "a")
println(d)

# pop - 删除并返回值
val = pop!(d, "b")
println(val)  # 2

# pop 带默认值
val = pop!(d, "x", -1)
println(val)  # -1

# 清空字典
empty!(d)
println(isempty(d))  # true

遍历字典

julia
d = Dict("a" => 1, "b" => 2, "c" => 3)

# 遍历键值对
for (key, value) in d
    println("$key => $value")
end

# 只遍历键
for key in keys(d)
    println(key)
end

# 只遍历值
for value in values(d)
    println(value)
end

# 使用 pairs
for pair in pairs(d)
    println(pair)  # "a" => 1 等
end

字典操作

julia
d1 = Dict("a" => 1, "b" => 2)
d2 = Dict("b" => 20, "c" => 3)

# 合并(创建新字典)
d3 = merge(d1, d2)
println(d3)  # Dict("a"=>1, "b"=>20, "c"=>3)

# 合并(原地修改)
merge!(d1, d2)

# 合并时自定义冲突处理
d1 = Dict("a" => 1, "b" => 2)
d2 = Dict("b" => 20, "c" => 3)
d3 = merge(+, d1, d2)  # 相同键的值相加
println(d3)  # Dict("a"=>1, "b"=>22, "c"=>3)

# 过滤
d = Dict("a" => 1, "b" => 2, "c" => 3, "d" => 4)
filtered = filter(p -> p.second > 2, d)
println(filtered)  # Dict("c"=>3, "d"=>4)

字典属性

julia
d = Dict("a" => 1, "b" => 2, "c" => 3)

# 长度
println(length(d))  # 3

# 键和值
println(keys(d))    # 键的集合
println(values(d))  # 值的集合

# 转换为数组
println(collect(keys(d)))
println(collect(values(d)))

# 键值对数组
println(collect(d))  # [("a", 1), ("b", 2), ("c", 3)]

有序字典

标准 Dict 不保证顺序,如需有序可使用 OrderedDict

julia
using OrderedCollections

# 创建有序字典
od = OrderedDict("a" => 1, "b" => 2, "c" => 3)

# 保持插入顺序
od["d"] = 4
for (k, v) in od
    println("$k => $v")
end
# 按插入顺序输出

默认字典

julia
using DataStructures

# 创建带默认值的字典
dd = DefaultDict{String, Int}(0)
dd["a"] += 1
dd["b"] += 1
dd["a"] += 1
println(dd["a"])  # 2
println(dd["c"])  # 0(未设置的键返回默认值)

# 默认值为空数组
dd = DefaultDict{String, Vector{Int}}(Vector{Int})
push!(dd["a"], 1)
push!(dd["a"], 2)
push!(dd["b"], 3)
println(dd)

集合(Set)

创建集合

julia
# 使用 Set 构造函数
s = Set([1, 2, 3, 4, 5])
println(s)

# 空集合
empty_set = Set()
empty_typed = Set{Int}()

# 从其他集合创建
s = Set("hello")  # 字符集合
println(s)  # Set(['h', 'e', 'l', 'o'])

# 使用推导式
s = Set(x^2 for x in 1:5)
println(s)  # Set([1, 4, 9, 16, 25])

集合操作

julia
s = Set([1, 2, 3])

# 添加元素
push!(s, 4)
println(s)  # Set([1, 2, 3, 4])

# 添加多个元素
union!(s, [5, 6])
println(s)

# 删除元素
delete!(s, 1)
println(s)

# pop - 删除并返回任意元素
elem = pop!(s)
println("删除了: $elem")

# 检查成员
println(2 in s)  # true
println(2 s)   # true(Unicode)
println(100 in s) # false
println(100 s)  # true

集合运算

julia
a = Set([1, 2, 3, 4])
b = Set([3, 4, 5, 6])

# 并集
println(union(a, b))       # Set([1, 2, 3, 4, 5, 6])
println(a  b)             # 同上(Unicode)

# 交集
println(intersect(a, b))   # Set([3, 4])
println(a  b)             # 同上(Unicode)

# 差集
println(setdiff(a, b))     # Set([1, 2])

# 对称差(在a或b中,但不同时在两者中)
println(symdiff(a, b))     # Set([1, 2, 5, 6])

# 子集检查
println(issubset([1, 2], a))  # true
println([1, 2]  a)           # true
println(a  [1, 2])           # true(超集)

# 相等
println(Set([1, 2]) == Set([2, 1]))  # true

原地集合运算

julia
a = Set([1, 2, 3, 4])
b = Set([3, 4, 5, 6])

# 原地并集
union!(a, b)
println(a)  # Set([1, 2, 3, 4, 5, 6])

# 原地交集
a = Set([1, 2, 3, 4])
intersect!(a, b)
println(a)  # Set([3, 4])

# 原地差集
a = Set([1, 2, 3, 4])
setdiff!(a, b)
println(a)  # Set([1, 2])

集合属性

julia
s = Set([1, 2, 3, 4, 5])

# 长度
println(length(s))  # 5

# 是否为空
println(isempty(s))  # false

# 转换为数组
arr = collect(s)
println(arr)

# 排序后的数组
sorted = sort(collect(s))
println(sorted)

实用示例

统计词频

julia
function word_frequency(text)
    words = split(lowercase(text))
    freq = Dict{String, Int}()
    
    for word in words
        freq[word] = get(freq, word, 0) + 1
    end
    
    return freq
end

text = "the quick brown fox jumps over the lazy dog the fox"
freq = word_frequency(text)

# 按频率排序
sorted_freq = sort(collect(freq), by=x->x[2], rev=true)
for (word, count) in sorted_freq
    println("$word: $count")
end

去重保持顺序

julia
function unique_ordered(arr)
    seen = Set{eltype(arr)}()
    result = eltype(arr)[]
    
    for x in arr
        if x  seen
            push!(seen, x)
            push!(result, x)
        end
    end
    
    return result
end

arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
println(unique_ordered(arr))  # [3, 1, 4, 5, 9, 2, 6]

分组数据

julia
function group_by(arr, key_func)
    groups = Dict{Any, Vector}()
    
    for item in arr
        key = key_func(item)
        if !haskey(groups, key)
            groups[key] = []
        end
        push!(groups[key], item)
    end
    
    return groups
end

# 按奇偶分组
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
grouped = group_by(numbers, x -> x % 2 == 0 ? "偶数" : "奇数")
println(grouped)

# 按首字母分组
words = ["apple", "banana", "avocado", "blueberry", "cherry"]
grouped = group_by(words, w -> w[1])
println(grouped)

双向映射

julia
struct BiDict{K, V}
    forward::Dict{K, V}
    backward::Dict{V, K}
end

function BiDict{K, V}() where {K, V}
    BiDict(Dict{K, V}(), Dict{V, K}())
end

function Base.setindex!(bd::BiDict, value, key)
    bd.forward[key] = value
    bd.backward[value] = key
end

function get_value(bd::BiDict, key)
    return bd.forward[key]
end

function get_key(bd::BiDict, value)
    return bd.backward[value]
end

# 使用
bd = BiDict{String, Int}()
bd["one"] = 1
bd["two"] = 2

println(get_value(bd, "one"))  # 1
println(get_key(bd, 2))        # "two"

缓存/记忆化

julia
function memoize(f)
    cache = Dict()
    
    function memoized(args...)
        if !haskey(cache, args)
            cache[args] = f(args...)
        end
        return cache[args]
    end
    
    return memoized
end

# 使用
slow_fib(n) = n <= 2 ? 1 : slow_fib(n-1) + slow_fib(n-2)

fast_fib = memoize(function(n)
    n <= 2 ? 1 : fast_fib(n-1) + fast_fib(n-2)
end)

@time println(fast_fib(40))  # 快速

性能提示

julia
# 1. 指定类型
d = Dict{String, Int}()  # 比 Dict() 更高效

# 2. 使用 sizehint! 预分配
d = Dict{String, Int}()
sizehint!(d, 1000)  # 预计有 1000 个元素

# 3. 避免字符串键的频繁创建
# 不好
for i in 1:1000
    d["key_$i"] = i
end

# 好
keys_arr = ["key_$i" for i in 1:1000]
for i in 1:1000
    d[keys_arr[i]] = i
end

下一步

学习完字典和集合后,请继续学习:

本站内容仅供学习和研究使用。