Go 语言正则表达式
正则表达式是处理文本模式匹配的强大工具。Go 语言通过 regexp 包提供了完整的正则表达式支持。
📋 正则表达式基础
基本匹配和查找
go
package main
import (
"fmt"
"regexp"
)
func basicMatching() {
fmt.Println("=== 基本正则匹配 ===")
// 简单匹配
pattern := "Go"
text := "Go is a programming language"
matched, _ := regexp.MatchString(pattern, text)
fmt.Printf("'%s' 匹配 '%s': %v\n", pattern, text, matched)
// 编译正则表达式
re := regexp.MustCompile(`\b[Gg]o\b`) // 匹配单词 "Go" 或 "go"
testTexts := []string{
"Go is great",
"I love go programming",
"going somewhere",
"Let's go!",
}
for _, text := range testTexts {
if re.MatchString(text) {
fmt.Printf("✅ '%s'\n", text)
} else {
fmt.Printf("❌ '%s'\n", text)
}
}
// 查找所有匹配
numberRe := regexp.MustCompile(`\d+`)
numText := "我有 3 个苹果和 15 个橙子"
first := numberRe.FindString(numText)
all := numberRe.FindAllString(numText, -1)
fmt.Printf("第一个数字: %s\n", first)
fmt.Printf("所有数字: %v\n", all)
}
func main() {
basicMatching()
}捕获组和替换
go
package main
import (
"fmt"
"regexp"
)
func captureAndReplace() {
fmt.Println("=== 捕获组和替换 ===")
// 日期捕获
dateRe := regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2})`)
dateText := "今天是 2023-12-25"
match := dateRe.FindStringSubmatch(dateText)
if match != nil {
fmt.Printf("完整匹配: %s\n", match[0])
fmt.Printf("年: %s, 月: %s, 日: %s\n", match[1], match[2], match[3])
}
// 命名捕获组
namedRe := regexp.MustCompile(`(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})`)
match = namedRe.FindStringSubmatch("生日: 1990-05-15")
if match != nil {
names := namedRe.SubexpNames()
for i, name := range names {
if i > 0 && name != "" {
fmt.Printf("%s: %s\n", name, match[i])
}
}
}
// 字符串替换
fmt.Println("\n字符串替换:")
// 简单替换
catRe := regexp.MustCompile(`\bcat\b`)
text := "The cat sat on the mat. Another cat was nearby."
replaced := catRe.ReplaceAllString(text, "dog")
fmt.Printf("原文: %s\n", text)
fmt.Printf("替换后: %s\n", replaced)
// 使用捕获组替换 (YYYY-MM-DD -> MM/DD/YYYY)
dateText2 := "日期: 2023-12-25 和 2023-01-01"
formatted := dateRe.ReplaceAllString(dateText2, "$2/$3/$1")
fmt.Printf("原格式: %s\n", dateText2)
fmt.Printf("新格式: %s\n", formatted)
}
func main() {
captureAndReplace()
}🎯 数据验证
常用验证模式
go
package main
import (
"fmt"
"regexp"
)
// 验证器
type Validator struct {
patterns map[string]*regexp.Regexp
}
func NewValidator() *Validator {
return &Validator{
patterns: map[string]*regexp.Regexp{
"email": regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`),
"phone": regexp.MustCompile(`^1[3-9]\d{9}$`), // 中国手机号
"password": regexp.MustCompile(`^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$`),
"url": regexp.MustCompile(`^https?:\/\/[^\s/$.?#].[^\s]*$`),
"ipv4": regexp.MustCompile(`^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`),
},
}
}
func (v *Validator) Validate(dataType, value string) bool {
if pattern, exists := v.patterns[dataType]; exists {
return pattern.MatchString(value)
}
return false
}
func validationDemo() {
fmt.Println("=== 数据验证演示 ===")
validator := NewValidator()
testData := map[string][]string{
"email": {
"user@example.com", // ✅
"invalid-email", // ❌
"test@domain.co.uk", // ✅
},
"phone": {
"13812345678", // ✅
"1234567890", // ❌
"15987654321", // ✅
},
"password": {
"Password123!", // ✅
"password", // ❌
"ComplexPass@2023", // ✅
},
"url": {
"https://www.example.com", // ✅
"invalid-url", // ❌
"http://api.service.org", // ✅
},
}
for dataType, values := range testData {
fmt.Printf("\n%s 验证:\n", dataType)
for _, value := range values {
isValid := validator.Validate(dataType, value)
status := "❌"
if isValid {
status = "✅"
}
fmt.Printf(" %s '%s'\n", status, value)
}
}
}
func main() {
validationDemo()
}🔧 文本处理
文本清理和处理
go
package main
import (
"fmt"
"regexp"
"strings"
)
type TextProcessor struct {
htmlTags *regexp.Regexp
whitespace *regexp.Regexp
numbers *regexp.Regexp
emails *regexp.Regexp
}
func NewTextProcessor() *TextProcessor {
return &TextProcessor{
htmlTags: regexp.MustCompile(`<[^>]*>`),
whitespace: regexp.MustCompile(`\s+`),
numbers: regexp.MustCompile(`\d+`),
emails: regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`),
}
}
func (tp *TextProcessor) RemoveHTMLTags(text string) string {
return tp.htmlTags.ReplaceAllString(text, "")
}
func (tp *TextProcessor) NormalizeWhitespace(text string) string {
return strings.TrimSpace(tp.whitespace.ReplaceAllString(text, " "))
}
func (tp *TextProcessor) ExtractNumbers(text string) []string {
return tp.numbers.FindAllString(text, -1)
}
func (tp *TextProcessor) ExtractEmails(text string) []string {
return tp.emails.FindAllString(text, -1)
}
func (tp *TextProcessor) MaskSensitiveData(text string) string {
// 隐藏邮箱
emailMasked := tp.emails.ReplaceAllStringFunc(text, func(email string) string {
parts := strings.Split(email, "@")
if len(parts) == 2 {
user := parts[0]
domain := parts[1]
if len(user) > 2 {
return user[:2] + "***@" + domain
}
}
return "***@***"
})
// 隐藏手机号
phoneRe := regexp.MustCompile(`1[3-9]\d{9}`)
return phoneRe.ReplaceAllStringFunc(emailMasked, func(phone string) string {
return phone[:3] + "****" + phone[7:]
})
}
func textProcessingDemo() {
fmt.Println("=== 文本处理演示 ===")
processor := NewTextProcessor()
// HTML 清理
htmlText := `<p>联系我们:<strong>邮箱</strong> admin@company.com</p>
<div>电话:<span>13812345678</span></div>`
fmt.Printf("原文: %s\n", htmlText)
cleaned := processor.RemoveHTMLTags(htmlText)
fmt.Printf("移除HTML: %s\n", cleaned)
normalized := processor.NormalizeWhitespace(cleaned)
fmt.Printf("规范空白: %s\n", normalized)
// 数据提取
fmt.Println("\n数据提取:")
text := "联系方式: john@example.com, admin@company.org, 电话: 13812345678, 价格: 299.99"
emails := processor.ExtractEmails(text)
numbers := processor.ExtractNumbers(text)
fmt.Printf("原文: %s\n", text)
fmt.Printf("邮箱: %v\n", emails)
fmt.Printf("数字: %v\n", numbers)
// 敏感数据脱敏
fmt.Println("\n敏感数据脱敏:")
sensitiveText := "用户邮箱: alice@example.com, 手机: 13987654321"
fmt.Printf("原文: %s\n", sensitiveText)
masked := processor.MaskSensitiveData(sensitiveText)
fmt.Printf("脱敏后: %s\n", masked)
}
func main() {
textProcessingDemo()
}🎯 实际应用示例
日志解析器
go
package main
import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
)
type LogEntry struct {
Timestamp time.Time
Level string
IP string
Message string
Status int
}
type LogParser struct {
accessLogRe *regexp.Regexp
errorLogRe *regexp.Regexp
ipRe *regexp.Regexp
}
func NewLogParser() *LogParser {
return &LogParser{
// 简化的访问日志格式: IP [时间] "请求" 状态码
accessLogRe: regexp.MustCompile(`^(\S+) \[([^\]]+)\] "([^"]*)" (\d+)`),
// 错误日志格式: [时间] 级别: 消息
errorLogRe: regexp.MustCompile(`^\[([^\]]+)\] (\w+): (.+)`),
// IP 地址
ipRe: regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`),
}
}
func (lp *LogParser) ParseAccessLog(line string) (*LogEntry, error) {
matches := lp.accessLogRe.FindStringSubmatch(line)
if len(matches) < 5 {
return nil, fmt.Errorf("无法解析访问日志")
}
timestamp, _ := time.Parse("02/Jan/2006:15:04:05", matches[2])
status, _ := strconv.Atoi(matches[4])
return &LogEntry{
Timestamp: timestamp,
Level: "ACCESS",
IP: matches[1],
Message: matches[3],
Status: status,
}, nil
}
func (lp *LogParser) ParseErrorLog(line string) (*LogEntry, error) {
matches := lp.errorLogRe.FindStringSubmatch(line)
if len(matches) < 4 {
return nil, fmt.Errorf("无法解析错误日志")
}
timestamp, _ := time.Parse("2006-01-02 15:04:05", matches[1])
// 提取IP地址
ip := ""
if ips := lp.ipRe.FindAllString(matches[3], 1); len(ips) > 0 {
ip = ips[0]
}
return &LogEntry{
Timestamp: timestamp,
Level: matches[2],
IP: ip,
Message: matches[3],
Status: 500,
}, nil
}
func (lp *LogParser) AnalyzeLogs(entries []LogEntry) map[string]interface{} {
stats := map[string]interface{}{
"total": len(entries),
"levels": make(map[string]int),
"status_codes": make(map[string]int),
"unique_ips": make(map[string]bool),
"errors": 0,
}
levels := stats["levels"].(map[string]int)
statusCodes := stats["status_codes"].(map[string]int)
uniqueIPs := stats["unique_ips"].(map[string]bool)
errors := 0
for _, entry := range entries {
levels[entry.Level]++
statusKey := strconv.Itoa(entry.Status)
statusCodes[statusKey]++
if entry.IP != "" {
uniqueIPs[entry.IP] = true
}
if entry.Status >= 400 || strings.ToUpper(entry.Level) == "ERROR" {
errors++
}
}
stats["errors"] = errors
stats["unique_ip_count"] = len(uniqueIPs)
return stats
}
func logParsingDemo() {
fmt.Println("=== 日志解析演示 ===")
parser := NewLogParser()
// 模拟日志数据
accessLogs := []string{
`192.168.1.100 [25/Dec/2023:10:00:00] "GET /index.html" 200`,
`192.168.1.101 [25/Dec/2023:10:01:00] "POST /api/login" 200`,
`192.168.1.102 [25/Dec/2023:10:02:00] "GET /admin" 403`,
`192.168.1.100 [25/Dec/2023:10:03:00] "GET /notfound" 404`,
}
errorLogs := []string{
`[2023-12-25 10:05:00] ERROR: Database connection failed from 192.168.1.103`,
`[2023-12-25 10:06:00] WARN: High memory usage detected`,
`[2023-12-25 10:07:00] ERROR: Auth failed for 192.168.1.104`,
}
var allEntries []LogEntry
// 解析访问日志
fmt.Println("解析访问日志:")
for _, line := range accessLogs {
if entry, err := parser.ParseAccessLog(line); err == nil {
allEntries = append(allEntries, *entry)
fmt.Printf("✅ %s [%d] %s from %s\n",
entry.Level, entry.Status, entry.Message, entry.IP)
}
}
// 解析错误日志
fmt.Println("\n解析错误日志:")
for _, line := range errorLogs {
if entry, err := parser.ParseErrorLog(line); err == nil {
allEntries = append(allEntries, *entry)
fmt.Printf("✅ %s: %s\n", entry.Level, entry.Message)
}
}
// 统计分析
fmt.Println("\n=== 统计分析 ===")
stats := parser.AnalyzeLogs(allEntries)
fmt.Printf("总日志数: %v\n", stats["total"])
fmt.Printf("错误数: %v\n", stats["errors"])
fmt.Printf("唯一IP数: %v\n", stats["unique_ip_count"])
fmt.Println("\n级别分布:")
levels := stats["levels"].(map[string]int)
for level, count := range levels {
fmt.Printf(" %s: %d\n", level, count)
}
fmt.Println("\n状态码分布:")
statusCodes := stats["status_codes"].(map[string]int)
for code, count := range statusCodes {
fmt.Printf(" %s: %d\n", code, count)
}
}
func main() {
logParsingDemo()
}🎓 小结
本章我们学习了 Go 语言的正则表达式:
- ✅ 基础匹配:模式编译、字符串匹配、查找操作
- ✅ 捕获组:捕获组、命名组、模式替换
- ✅ 数据验证:邮箱、手机号、密码等常用验证
- ✅ 文本处理:HTML清理、数据提取、敏感信息脱敏
- ✅ 实际应用:日志解析器实现
正则表达式是处理文本数据的强大工具,在数据验证、文本解析、日志分析等场景中广泛应用。
接下来,我们将学习 Go 类型断言,深入了解接口类型的动态检查机制。
正则表达式建议
- 优先使用编译后的正则表达式提高性能
- 合理使用捕获组,避免不必要的复杂度
- 注意正则表达式的性能,避免回溯过多
- 对于复杂验证,考虑组合多个简单模式