Julia 字典和集合
字典(Dict)和集合(Set)是 Julia 中重要的数据结构,用于存储键值对和唯一元素。
字典(Dict)
创建字典
julia
# 使用 Dict 构造函数
d = Dict("a" => 1, "b" => 2, "c" => 3)
println(d)
# 空字典
empty_dict = Dict()
empty_typed = Dict{String, Int}()
# 从数组创建
keys_arr = ["a", "b", "c"]
vals_arr = [1, 2, 3]
d = Dict(zip(keys_arr, vals_arr))
# 使用推导式
d = Dict(x => x^2 for x in 1:5)
println(d) # Dict(1=>1, 2=>4, 3=>9, 4=>16, 5=>25)访问元素
julia
d = Dict("apple" => 5, "banana" => 3, "cherry" => 8)
# 使用键访问
println(d["apple"]) # 5
# 使用 get(带默认值)
println(get(d, "apple", 0)) # 5
println(get(d, "orange", 0)) # 0(不存在时返回默认值)
# get! - 获取或插入默认值
println(get!(d, "orange", 10)) # 10(插入并返回)
println(d["orange"]) # 10
# 检查键是否存在
println(haskey(d, "apple")) # true
println("apple" in keys(d)) # true修改字典
julia
d = Dict{String, Int}()
# 添加/更新元素
d["a"] = 1
d["b"] = 2
println(d)
# 批量添加
merge!(d, Dict("c" => 3, "d" => 4))
println(d)
# 删除元素
delete!(d, "a")
println(d)
# pop - 删除并返回值
val = pop!(d, "b")
println(val) # 2
# pop 带默认值
val = pop!(d, "x", -1)
println(val) # -1
# 清空字典
empty!(d)
println(isempty(d)) # true遍历字典
julia
d = Dict("a" => 1, "b" => 2, "c" => 3)
# 遍历键值对
for (key, value) in d
println("$key => $value")
end
# 只遍历键
for key in keys(d)
println(key)
end
# 只遍历值
for value in values(d)
println(value)
end
# 使用 pairs
for pair in pairs(d)
println(pair) # "a" => 1 等
end字典操作
julia
d1 = Dict("a" => 1, "b" => 2)
d2 = Dict("b" => 20, "c" => 3)
# 合并(创建新字典)
d3 = merge(d1, d2)
println(d3) # Dict("a"=>1, "b"=>20, "c"=>3)
# 合并(原地修改)
merge!(d1, d2)
# 合并时自定义冲突处理
d1 = Dict("a" => 1, "b" => 2)
d2 = Dict("b" => 20, "c" => 3)
d3 = merge(+, d1, d2) # 相同键的值相加
println(d3) # Dict("a"=>1, "b"=>22, "c"=>3)
# 过滤
d = Dict("a" => 1, "b" => 2, "c" => 3, "d" => 4)
filtered = filter(p -> p.second > 2, d)
println(filtered) # Dict("c"=>3, "d"=>4)字典属性
julia
d = Dict("a" => 1, "b" => 2, "c" => 3)
# 长度
println(length(d)) # 3
# 键和值
println(keys(d)) # 键的集合
println(values(d)) # 值的集合
# 转换为数组
println(collect(keys(d)))
println(collect(values(d)))
# 键值对数组
println(collect(d)) # [("a", 1), ("b", 2), ("c", 3)]有序字典
标准 Dict 不保证顺序,如需有序可使用 OrderedDict:
julia
using OrderedCollections
# 创建有序字典
od = OrderedDict("a" => 1, "b" => 2, "c" => 3)
# 保持插入顺序
od["d"] = 4
for (k, v) in od
println("$k => $v")
end
# 按插入顺序输出默认字典
julia
using DataStructures
# 创建带默认值的字典
dd = DefaultDict{String, Int}(0)
dd["a"] += 1
dd["b"] += 1
dd["a"] += 1
println(dd["a"]) # 2
println(dd["c"]) # 0(未设置的键返回默认值)
# 默认值为空数组
dd = DefaultDict{String, Vector{Int}}(Vector{Int})
push!(dd["a"], 1)
push!(dd["a"], 2)
push!(dd["b"], 3)
println(dd)集合(Set)
创建集合
julia
# 使用 Set 构造函数
s = Set([1, 2, 3, 4, 5])
println(s)
# 空集合
empty_set = Set()
empty_typed = Set{Int}()
# 从其他集合创建
s = Set("hello") # 字符集合
println(s) # Set(['h', 'e', 'l', 'o'])
# 使用推导式
s = Set(x^2 for x in 1:5)
println(s) # Set([1, 4, 9, 16, 25])集合操作
julia
s = Set([1, 2, 3])
# 添加元素
push!(s, 4)
println(s) # Set([1, 2, 3, 4])
# 添加多个元素
union!(s, [5, 6])
println(s)
# 删除元素
delete!(s, 1)
println(s)
# pop - 删除并返回任意元素
elem = pop!(s)
println("删除了: $elem")
# 检查成员
println(2 in s) # true
println(2 ∈ s) # true(Unicode)
println(100 in s) # false
println(100 ∉ s) # true集合运算
julia
a = Set([1, 2, 3, 4])
b = Set([3, 4, 5, 6])
# 并集
println(union(a, b)) # Set([1, 2, 3, 4, 5, 6])
println(a ∪ b) # 同上(Unicode)
# 交集
println(intersect(a, b)) # Set([3, 4])
println(a ∩ b) # 同上(Unicode)
# 差集
println(setdiff(a, b)) # Set([1, 2])
# 对称差(在a或b中,但不同时在两者中)
println(symdiff(a, b)) # Set([1, 2, 5, 6])
# 子集检查
println(issubset([1, 2], a)) # true
println([1, 2] ⊆ a) # true
println(a ⊇ [1, 2]) # true(超集)
# 相等
println(Set([1, 2]) == Set([2, 1])) # true原地集合运算
julia
a = Set([1, 2, 3, 4])
b = Set([3, 4, 5, 6])
# 原地并集
union!(a, b)
println(a) # Set([1, 2, 3, 4, 5, 6])
# 原地交集
a = Set([1, 2, 3, 4])
intersect!(a, b)
println(a) # Set([3, 4])
# 原地差集
a = Set([1, 2, 3, 4])
setdiff!(a, b)
println(a) # Set([1, 2])集合属性
julia
s = Set([1, 2, 3, 4, 5])
# 长度
println(length(s)) # 5
# 是否为空
println(isempty(s)) # false
# 转换为数组
arr = collect(s)
println(arr)
# 排序后的数组
sorted = sort(collect(s))
println(sorted)实用示例
统计词频
julia
function word_frequency(text)
words = split(lowercase(text))
freq = Dict{String, Int}()
for word in words
freq[word] = get(freq, word, 0) + 1
end
return freq
end
text = "the quick brown fox jumps over the lazy dog the fox"
freq = word_frequency(text)
# 按频率排序
sorted_freq = sort(collect(freq), by=x->x[2], rev=true)
for (word, count) in sorted_freq
println("$word: $count")
end去重保持顺序
julia
function unique_ordered(arr)
seen = Set{eltype(arr)}()
result = eltype(arr)[]
for x in arr
if x ∉ seen
push!(seen, x)
push!(result, x)
end
end
return result
end
arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
println(unique_ordered(arr)) # [3, 1, 4, 5, 9, 2, 6]分组数据
julia
function group_by(arr, key_func)
groups = Dict{Any, Vector}()
for item in arr
key = key_func(item)
if !haskey(groups, key)
groups[key] = []
end
push!(groups[key], item)
end
return groups
end
# 按奇偶分组
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
grouped = group_by(numbers, x -> x % 2 == 0 ? "偶数" : "奇数")
println(grouped)
# 按首字母分组
words = ["apple", "banana", "avocado", "blueberry", "cherry"]
grouped = group_by(words, w -> w[1])
println(grouped)双向映射
julia
struct BiDict{K, V}
forward::Dict{K, V}
backward::Dict{V, K}
end
function BiDict{K, V}() where {K, V}
BiDict(Dict{K, V}(), Dict{V, K}())
end
function Base.setindex!(bd::BiDict, value, key)
bd.forward[key] = value
bd.backward[value] = key
end
function get_value(bd::BiDict, key)
return bd.forward[key]
end
function get_key(bd::BiDict, value)
return bd.backward[value]
end
# 使用
bd = BiDict{String, Int}()
bd["one"] = 1
bd["two"] = 2
println(get_value(bd, "one")) # 1
println(get_key(bd, 2)) # "two"缓存/记忆化
julia
function memoize(f)
cache = Dict()
function memoized(args...)
if !haskey(cache, args)
cache[args] = f(args...)
end
return cache[args]
end
return memoized
end
# 使用
slow_fib(n) = n <= 2 ? 1 : slow_fib(n-1) + slow_fib(n-2)
fast_fib = memoize(function(n)
n <= 2 ? 1 : fast_fib(n-1) + fast_fib(n-2)
end)
@time println(fast_fib(40)) # 快速性能提示
julia
# 1. 指定类型
d = Dict{String, Int}() # 比 Dict() 更高效
# 2. 使用 sizehint! 预分配
d = Dict{String, Int}()
sizehint!(d, 1000) # 预计有 1000 个元素
# 3. 避免字符串键的频繁创建
# 不好
for i in 1:1000
d["key_$i"] = i
end
# 好
keys_arr = ["key_$i" for i in 1:1000]
for i in 1:1000
d[keys_arr[i]] = i
end下一步
学习完字典和集合后,请继续学习: