正则表达式
概述
正则表达式是处理文本的强大工具,用于模式匹配、文本搜索、替换和验证。Kotlin提供了完整的正则表达式支持,基于Java的Pattern和Matcher类,同时提供了更简洁的Kotlin风格API。
正则表达式基础
创建正则表达式
kotlin
fun main() {
println("=== 正则表达式创建 ===")
// 1. 使用Regex构造函数
val regex1 = Regex("hello")
val regex2 = Regex("[0-9]+")
val regex3 = Regex("\\d{3}-\\d{3}-\\d{4}") // 电话号码格式
// 2. 使用字符串的toRegex()扩展函数
val regex4 = "world".toRegex()
val regex5 = "[a-zA-Z]+".toRegex()
// 3. 使用原始字符串避免转义
val regex6 = Regex("""^\d{4}-\d{2}-\d{2}$""") // 日期格式
val regex7 = Regex("""[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}""") // 邮箱
// 4. 带选项的正则表达式
val regex8 = Regex("HELLO", RegexOption.IGNORE_CASE)
val regex9 = Regex("""
\d{3} # 区号
- # 分隔符
\d{3} # 前三位
- # 分隔符
\d{4} # 后四位
""".trimIndent(), setOf(RegexOption.COMMENTS, RegexOption.IGNORE_CASE))
println("正则表达式创建完成")
// 测试基本匹配
val testString = "Hello World 123"
println("测试字符串: '$testString'")
println("包含'hello'(忽略大小写): ${regex8.containsMatchIn(testString)}")
println("包含数字: ${regex2.containsMatchIn(testString)}")
println("包含字母: ${regex5.containsMatchIn(testString)}")
}基本匹配操作
kotlin
fun main() {
println("=== 基本匹配操作 ===")
val text = "联系电话:138-1234-5678,邮箱:user@example.com,日期:2023-12-25"
// 1. 检查是否包含匹配
val phoneRegex = Regex("""\d{3}-\d{4}-\d{4}""")
val emailRegex = Regex("""[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}""")
val dateRegex = Regex("""\d{4}-\d{2}-\d{2}""")
println("文本: $text")
println("包含电话号码: ${phoneRegex.containsMatchIn(text)}")
println("包含邮箱: ${emailRegex.containsMatchIn(text)}")
println("包含日期: ${dateRegex.containsMatchIn(text)}")
// 2. 查找第一个匹配
val phoneMatch = phoneRegex.find(text)
phoneMatch?.let { match ->
println("找到电话号码: ${match.value}")
println("位置: ${match.range}")
}
val emailMatch = emailRegex.find(text)
emailMatch?.let { match ->
println("找到邮箱: ${match.value}")
println("开始位置: ${match.range.first}")
println("结束位置: ${match.range.last}")
}
// 3. 查找所有匹配
val numberRegex = Regex("""\d+""")
val allNumbers = numberRegex.findAll(text)
println("所有数字:")
allNumbers.forEach { match ->
println(" ${match.value} at ${match.range}")
}
// 4. 完全匹配
val phoneNumber = "138-1234-5678"
val fullPhoneRegex = Regex("""^\d{3}-\d{4}-\d{4}$""")
println("'$phoneNumber' 是完整的电话号码: ${fullPhoneRegex.matches(phoneNumber)}")
println("'138-1234' 是完整的电话号码: ${fullPhoneRegex.matches("138-1234")}")
// 5. 匹配结果转换
val numbers = numberRegex.findAll(text)
.map { it.value.toInt() }
.toList()
println("提取的数字: $numbers")
}正则表达式模式
字符类和量词
kotlin
fun main() {
println("=== 字符类和量词 ===")
val testStrings = listOf(
"abc123",
"ABC",
"123",
"hello@world.com",
"test_file.txt",
"special-chars!@#",
" spaces ",
"newline\nhere",
"tab\there"
)
// 字符类模式
val patterns = mapOf(
"数字" to Regex("""\d+"""), // 一个或多个数字
"字母" to Regex("""[a-zA-Z]+"""), // 一个或多个字母
"字母数字" to Regex("""\w+"""), // 一个或多个单词字符
"小写字母" to Regex("""[a-z]+"""), // 一个或多个小写字母
"大写字母" to Regex("""[A-Z]+"""), // 一个或多个大写字母
"特殊字符" to Regex("""[!@#$%^&*()]+"""), // 特殊字符
"空白字符" to Regex("""\s+"""), // 空白字符
"非数字" to Regex("""\D+"""), // 非数字字符
"非单词字符" to Regex("""\W+"""), // 非单词字符
"邮箱格式" to Regex("""\w+@\w+\.\w+""") // 简单邮箱格式
)
testStrings.forEach { testString ->
println("测试字符串: '$testString'")
patterns.forEach { (name, regex) ->
val matches = regex.findAll(testString).map { it.value }.toList()
if (matches.isNotEmpty()) {
println(" $name: $matches")
}
}
println()
}
// 量词示例
println("=== 量词示例 ===")
val quantifierTests = mapOf(
"a?" to listOf("", "a", "aa", "aaa"), // 0或1个a
"a*" to listOf("", "a", "aa", "aaa"), // 0个或多个a
"a+" to listOf("", "a", "aa", "aaa"), // 1个或多个a
"a{2}" to listOf("a", "aa", "aaa"), // 恰好2个a
"a{2,}" to listOf("a", "aa", "aaa", "aaaa"), // 2个或更多a
"a{1,3}" to listOf("", "a", "aa", "aaa", "aaaa") // 1到3个a
)
quantifierTests.forEach { (pattern, tests) ->
println("模式: $pattern")
val regex = Regex("^$pattern$")
tests.forEach { test ->
val matches = regex.matches(test)
println(" '$test': $matches")
}
println()
}
}分组和捕获
kotlin
fun main() {
println("=== 分组和捕获 ===")
// 1. 基本分组
val dateText = "今天是2023年12月25日,明天是2023年12月26日"
val dateRegex = Regex("""(\d{4})年(\d{1,2})月(\d{1,2})日""")
println("文本: $dateText")
println("查找所有日期:")
dateRegex.findAll(dateText).forEach { match ->
println("完整匹配: ${match.value}")
println("年份: ${match.groupValues[1]}")
println("月份: ${match.groupValues[2]}")
println("日期: ${match.groupValues[3]}")
println("所有分组: ${match.groupValues}")
println()
}
// 2. 命名分组
val namedDateRegex = Regex("""(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日""")
println("使用命名分组:")
namedDateRegex.find(dateText)?.let { match ->
println("年份: ${match.groups["year"]?.value}")
println("月份: ${match.groups["month"]?.value}")
println("日期: ${match.groups["day"]?.value}")
}
// 3. 电话号码解析
val phoneText = "联系方式:+86-138-1234-5678 或 021-12345678"
val phoneRegex = Regex("""(?:(\+\d{1,3})-)?(\d{3,4})-(\d{4,8})""")
println("电话号码解析:")
phoneRegex.findAll(phoneText).forEach { match ->
val countryCode = match.groupValues[1].takeIf { it.isNotEmpty() } ?: "无"
val areaCode = match.groupValues[2]
val number = match.groupValues[3]
println("完整号码: ${match.value}")
println("国家代码: $countryCode")
println("区号: $areaCode")
println("号码: $number")
println()
}
// 4. 邮箱地址解析
val emailText = "联系邮箱:john.doe@company.com 和 admin@test.org"
val emailRegex = Regex("""([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+)\.([a-zA-Z]{2,})""")
println("邮箱地址解析:")
emailRegex.findAll(emailText).forEach { match ->
println("完整邮箱: ${match.value}")
println("用户名: ${match.groupValues[1]}")
println("域名: ${match.groupValues[2]}")
println("顶级域名: ${match.groupValues[3]}")
println()
}
// 5. URL解析
val urlText = "访问 https://www.example.com:8080/path/to/page?param=value#section"
val urlRegex = Regex("""(https?)://([^:/]+)(?::(\d+))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?""")
println("URL解析:")
urlRegex.find(urlText)?.let { match ->
println("完整URL: ${match.value}")
println("协议: ${match.groupValues[1]}")
println("主机: ${match.groupValues[2]}")
println("端口: ${match.groupValues[3].takeIf { it.isNotEmpty() } ?: "默认"}")
println("路径: ${match.groupValues[4]}")
println("查询参数: ${match.groupValues[5]}")
println("锚点: ${match.groupValues[6]}")
}
}文本替换和处理
基本替换操作
kotlin
fun main() {
println("=== 文本替换操作 ===")
val originalText = """
用户信息:
姓名:张三
电话:138-1234-5678
邮箱:zhangsan@example.com
生日:1990-05-15
地址:北京市朝阳区某某街道123号
""".trimIndent()
println("原始文本:")
println(originalText)
println()
// 1. 简单替换
val hiddenPhoneText = originalText.replace(Regex("""\d{3}-\d{4}-\d{4}"""), "***-****-****")
println("隐藏电话号码:")
println(hiddenPhoneText)
println()
// 2. 使用分组进行替换
val formattedDateText = originalText.replace(
Regex("""(\d{4})-(\d{2})-(\d{2})"""),
"$1年$2月$3日"
)
println("格式化日期:")
println(formattedDateText)
println()
// 3. 使用替换函数
val maskedEmailText = originalText.replace(Regex("""([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})""")) { match ->
val username = match.groupValues[1]
val domain = match.groupValues[2]
val maskedUsername = username.take(2) + "*".repeat(maxOf(0, username.length - 2))
"$maskedUsername@$domain"
}
println("掩码邮箱:")
println(maskedEmailText)
println()
// 4. 复杂替换:格式化电话号码
val phoneFormatText = "电话号码:13812345678, 02112345678, +8613812345678"
val formattedPhones = phoneFormatText.replace(
Regex("""(?:\+86)?(\d{3})(\d{4})(\d{4})""")
) { match ->
"${match.groupValues[1]}-${match.groupValues[2]}-${match.groupValues[3]}"
}
println("格式化电话号码:")
println("原文: $phoneFormatText")
println("格式化后: $formattedPhones")
println()
// 5. 清理和标准化文本
val messyText = " Hello World! \n\n How are you? \t\t "
val cleanedText = messyText
.replace(Regex("""\s+"""), " ") // 多个空白字符替换为单个空格
.trim() // 去除首尾空白
println("文本清理:")
println("原文: '$messyText'")
println("清理后: '$cleanedText'")
}高级替换技巧
kotlin
fun main() {
println("=== 高级替换技巧 ===")
// 1. 条件替换
val priceText = "商品价格:$100.50, ¥200.30, €150.75, £80.25"
val convertedPrices = priceText.replace(Regex("""([¥€£$])(\d+\.?\d*)""")) { match ->
val currency = match.groupValues[1]
val amount = match.groupValues[2].toDouble()
val convertedAmount = when (currency) {
"$" -> amount * 7.2 // 美元转人民币
"€" -> amount * 7.8 // 欧元转人民币
"£" -> amount * 9.1 // 英镑转人民币
"¥" -> amount // 人民币保持不变
else -> amount
}
"¥${"%.2f".format(convertedAmount)}"
}
println("货币转换:")
println("原文: $priceText")
println("转换后: $convertedPrices")
println()
// 2. 文本格式化
val codeText = """
function calculateSum(a,b){
return a+b;
}
var result=calculateSum(10,20);
console.log(result);
""".trimIndent()
// 格式化JavaScript代码
val formattedCode = codeText
.replace(Regex("""(\w+)\s*\(\s*(\w+)\s*,\s*(\w+)\s*\)""")) { match ->
"${match.groupValues[1]}(${match.groupValues[2]}, ${match.groupValues[3]})"
}
.replace(Regex("""(\w+)\s*=\s*(\w+)""")) { match ->
"${match.groupValues[1]} = ${match.groupValues[2]}"
}
.replace(Regex("""\{\s*"""), " {\n ")
.replace(Regex("""\s*\}"""), "\n}")
.replace(Regex(""";(\w)"""), ";\n$1")
println("代码格式化:")
println("原代码:")
println(codeText)
println("\n格式化后:")
println(formattedCode)
println()
// 3. 模板替换
val template = "Hello {{name}}, your order #{{orderNumber}} for {{amount}} has been {{status}}."
val variables = mapOf(
"name" to "张三",
"orderNumber" to "12345",
"amount" to "$99.99",
"status" to "confirmed"
)
val processedTemplate = template.replace(Regex("""\{\{(\w+)\}\}""")) { match ->
val variableName = match.groupValues[1]
variables[variableName] ?: match.value
}
println("模板替换:")
println("模板: $template")
println("处理后: $processedTemplate")
println()
// 4. 文本脱敏
val sensitiveText = """
身份证号:110101199001011234
银行卡号:6222021234567890123
手机号:13812345678
""".trimIndent()
val desensitizedText = sensitiveText
.replace(Regex("""(\d{6})\d{8}(\d{4})""")) { match -> // 身份证
"${match.groupValues[1]}********${match.groupValues[2]}"
}
.replace(Regex("""(\d{4})\d{11}(\d{4})""")) { match -> // 银行卡
"${match.groupValues[1]}***********${match.groupValues[2]}"
}
.replace(Regex("""(\d{3})\d{4}(\d{4})""")) { match -> // 手机号
"${match.groupValues[1]}****${match.groupValues[2]}"
}
println("数据脱敏:")
println("原文:")
println(sensitiveText)
println("\n脱敏后:")
println(desensitizedText)
}数据验证
常用验证模式
kotlin
class DataValidator {
companion object {
// 常用正则表达式模式
val EMAIL_PATTERN = Regex("""^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$""")
val PHONE_PATTERN = Regex("""^1[3-9]\d{9}$""") // 中国手机号
val ID_CARD_PATTERN = Regex("""^\d{17}[\dXx]$""") // 身份证号
val PASSWORD_PATTERN = Regex("""^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$""")
val URL_PATTERN = Regex("""^https?://[^\s/$.?#].[^\s]*$""")
val IPV4_PATTERN = Regex("""^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$""")
val DATE_PATTERN = Regex("""^\d{4}-\d{2}-\d{2}$""")
val TIME_PATTERN = Regex("""^([01]?[0-9]|2[0-3]):[0-5][0-9]$""")
val CREDIT_CARD_PATTERN = Regex("""^\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}$""")
}
fun validateEmail(email: String): ValidationResult {
return if (EMAIL_PATTERN.matches(email)) {
ValidationResult.Success
} else {
ValidationResult.Error("邮箱格式不正确")
}
}
fun validatePhone(phone: String): ValidationResult {
return if (PHONE_PATTERN.matches(phone)) {
ValidationResult.Success
} else {
ValidationResult.Error("手机号格式不正确")
}
}
fun validatePassword(password: String): ValidationResult {
return when {
password.length < 8 -> ValidationResult.Error("密码长度至少8位")
!password.contains(Regex("[a-z]")) -> ValidationResult.Error("密码必须包含小写字母")
!password.contains(Regex("[A-Z]")) -> ValidationResult.Error("密码必须包含大写字母")
!password.contains(Regex("\\d")) -> ValidationResult.Error("密码必须包含数字")
!password.contains(Regex("[@$!%*?&]")) -> ValidationResult.Error("密码必须包含特殊字符")
else -> ValidationResult.Success
}
}
fun validateIdCard(idCard: String): ValidationResult {
if (!ID_CARD_PATTERN.matches(idCard)) {
return ValidationResult.Error("身份证号格式不正确")
}
// 验证校验位
val weights = intArrayOf(7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2)
val checkCodes = charArrayOf('1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2')
val sum = idCard.take(17).mapIndexed { index, char ->
char.digitToInt() * weights[index]
}.sum()
val expectedCheckCode = checkCodes[sum % 11]
val actualCheckCode = idCard.last().uppercaseChar()
return if (expectedCheckCode == actualCheckCode) {
ValidationResult.Success
} else {
ValidationResult.Error("身份证号校验位不正确")
}
}
fun validateUrl(url: String): ValidationResult {
return if (URL_PATTERN.matches(url)) {
ValidationResult.Success
} else {
ValidationResult.Error("URL格式不正确")
}
}
fun validateIPv4(ip: String): ValidationResult {
return if (IPV4_PATTERN.matches(ip)) {
ValidationResult.Success
} else {
ValidationResult.Error("IPv4地址格式不正确")
}
}
fun validateDate(date: String): ValidationResult {
if (!DATE_PATTERN.matches(date)) {
return ValidationResult.Error("日期格式不正确,应为YYYY-MM-DD")
}
val parts = date.split("-")
val year = parts[0].toInt()
val month = parts[1].toInt()
val day = parts[2].toInt()
return when {
year < 1900 || year > 2100 -> ValidationResult.Error("年份应在1900-2100之间")
month < 1 || month > 12 -> ValidationResult.Error("月份应在1-12之间")
day < 1 || day > 31 -> ValidationResult.Error("日期应在1-31之间")
month == 2 && day > 29 -> ValidationResult.Error("2月份日期不能超过29")
month == 2 && day == 29 && !isLeapYear(year) -> ValidationResult.Error("非闰年2月不能有29日")
(month == 4 || month == 6 || month == 9 || month == 11) && day > 30 ->
ValidationResult.Error("该月份只有30天")
else -> ValidationResult.Success
}
}
private fun isLeapYear(year: Int): Boolean {
return (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
}
sealed class ValidationResult {
object Success : ValidationResult()
data class Error(val message: String) : ValidationResult()
}
}
fun main() {
println("=== 数据验证示例 ===")
val validator = DataValidator()
// 测试数据
val testData = mapOf(
"邮箱" to listOf("user@example.com", "invalid-email", "test@domain"),
"手机号" to listOf("13812345678", "12345678901", "1381234567"),
"密码" to listOf("Password123!", "password", "PASSWORD123", "Pass123"),
"身份证" to listOf("110101199001011234", "11010119900101123X", "123456789012345678"),
"URL" to listOf("https://www.example.com", "http://test.org", "invalid-url"),
"IP地址" to listOf("192.168.1.1", "255.255.255.255", "256.1.1.1", "192.168.1"),
"日期" to listOf("2023-12-25", "2023-02-29", "2024-02-29", "2023-13-01")
)
testData.forEach { (type, values) ->
println("=== $type 验证 ===")
values.forEach { value ->
val result = when (type) {
"邮箱" -> validator.validateEmail(value)
"手机号" -> validator.validatePhone(value)
"密码" -> validator.validatePassword(value)
"身份证" -> validator.validateIdCard(value)
"URL" -> validator.validateUrl(value)
"IP地址" -> validator.validateIPv4(value)
"日期" -> validator.validateDate(value)
else -> DataValidator.ValidationResult.Error("未知类型")
}
val status = when (result) {
is DataValidator.ValidationResult.Success -> "✓ 有效"
is DataValidator.ValidationResult.Error -> "✗ ${result.message}"
}
println("$value: $status")
}
println()
}
}实际应用示例
日志分析器
kotlin
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
data class LogEntry(
val timestamp: LocalDateTime,
val level: String,
val logger: String,
val message: String,
val thread: String? = null,
val exception: String? = null
)
class LogAnalyzer {
// 不同日志格式的正则表达式
private val logPatterns = mapOf(
"standard" to Regex("""(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(\w+)\s+\[([^\]]+)\]\s+(.+)"""),
"detailed" to Regex("""(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3})\s+(\w+)\s+(\d+)\s+---\s+\[([^\]]+)\]\s+([^:]+):\s+(.+)"""),
"apache" to Regex("""(\d+\.\d+\.\d+\.\d+)\s+-\s+-\s+\[([^\]]+)\]\s+"([^"]+)"\s+(\d+)\s+(\d+)""")
)
private val dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
private val detailedDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS")
fun parseLogFile(logContent: String): List<LogEntry> {
val entries = mutableListOf<LogEntry>()
val lines = logContent.lines()
for (line in lines) {
if (line.isBlank()) continue
val entry = parseLogLine(line)
if (entry != null) {
entries.add(entry)
}
}
return entries
}
private fun parseLogLine(line: String): LogEntry? {
// 尝试标准格式
logPatterns["standard"]?.find(line)?.let { match ->
return LogEntry(
timestamp = LocalDateTime.parse(match.groupValues[1], dateTimeFormatter),
level = match.groupValues[2],
logger = match.groupValues[3],
message = match.groupValues[4]
)
}
// 尝试详细格式
logPatterns["detailed"]?.find(line)?.let { match ->
return LogEntry(
timestamp = LocalDateTime.parse(match.groupValues[1], detailedDateTimeFormatter),
level = match.groupValues[2],
thread = match.groupValues[3],
logger = match.groupValues[4],
message = match.groupValues[5]
)
}
return null
}
fun analyzeErrors(entries: List<LogEntry>): Map<String, Any> {
val errorEntries = entries.filter { it.level == "ERROR" }
val errorsByLogger = errorEntries.groupBy { it.logger }
val errorsByHour = errorEntries.groupBy { it.timestamp.hour }
// 提取异常类型
val exceptionPattern = Regex("""([a-zA-Z.]+Exception)""")
val exceptionTypes = errorEntries.mapNotNull { entry ->
exceptionPattern.find(entry.message)?.groupValues?.get(1)
}.groupingBy { it }.eachCount()
return mapOf(
"totalErrors" to errorEntries.size,
"errorsByLogger" to errorsByLogger.mapValues { it.value.size },
"errorsByHour" to errorsByHour.mapValues { it.value.size },
"topExceptions" to exceptionTypes.toList().sortedByDescending { it.second }.take(5)
)
}
fun extractIpAddresses(entries: List<LogEntry>): Set<String> {
val ipPattern = Regex("""\b(?:\d{1,3}\.){3}\d{1,3}\b""")
return entries.flatMap { entry ->
ipPattern.findAll(entry.message).map { it.value }
}.toSet()
}
fun findSuspiciousActivity(entries: List<LogEntry>): List<String> {
val suspiciousPatterns = listOf(
Regex("""(?i)sql\s+injection"""),
Regex("""(?i)xss\s+attack"""),
Regex("""(?i)unauthorized\s+access"""),
Regex("""(?i)brute\s+force"""),
Regex("""(?i)ddos"""),
Regex("""(?i)malware""")
)
val suspiciousEntries = mutableListOf<String>()
entries.forEach { entry ->
suspiciousPatterns.forEach { pattern ->
if (pattern.containsMatchIn(entry.message)) {
suspiciousEntries.add("${entry.timestamp}: ${entry.message}")
}
}
}
return suspiciousEntries
}
fun generateReport(entries: List<LogEntry>): String {
val errorAnalysis = analyzeErrors(entries)
val ipAddresses = extractIpAddresses(entries)
val suspiciousActivity = findSuspiciousActivity(entries)
return buildString {
appendLine("=== 日志分析报告 ===")
appendLine("总日志条数: ${entries.size}")
appendLine("时间范围: ${entries.minByOrNull { it.timestamp }?.timestamp} 到 ${entries.maxByOrNull { it.timestamp }?.timestamp}")
appendLine()
appendLine("=== 错误分析 ===")
appendLine("总错误数: ${errorAnalysis["totalErrors"]}")
@Suppress("UNCHECKED_CAST")
val errorsByLogger = errorAnalysis["errorsByLogger"] as Map<String, Int>
appendLine("按日志器分组的错误:")
errorsByLogger.forEach { (logger, count) ->
appendLine(" $logger: $count")
}
@Suppress("UNCHECKED_CAST")
val topExceptions = errorAnalysis["topExceptions"] as List<Pair<String, Int>>
appendLine("主要异常类型:")
topExceptions.forEach { (exception, count) ->
appendLine(" $exception: $count")
}
appendLine()
appendLine("=== IP地址统计 ===")
appendLine("发现的IP地址数量: ${ipAddresses.size}")
ipAddresses.take(10).forEach { ip ->
appendLine(" $ip")
}
appendLine()
appendLine("=== 可疑活动 ===")
if (suspiciousActivity.isNotEmpty()) {
appendLine("发现 ${suspiciousActivity.size} 条可疑活动:")
suspiciousActivity.take(5).forEach { activity ->
appendLine(" $activity")
}
} else {
appendLine("未发现可疑活动")
}
}
}
}
fun main() {
println("=== 日志分析器示例 ===")
// 模拟日志内容
val logContent = """
2023-12-25 10:30:15 INFO [com.example.UserService] User login successful: user123
2023-12-25 10:30:16 DEBUG [com.example.DatabaseConnection] Connection established to 192.168.1.100
2023-12-25 10:30:17 ERROR [com.example.PaymentService] Payment failed: java.lang.NullPointerException at line 45
2023-12-25 10:30:18 WARN [com.example.SecurityService] Multiple failed login attempts from 192.168.1.200
2023-12-25 10:30:19 ERROR [com.example.UserService] SQL injection attempt detected from 10.0.0.50
2023-12-25 10:30:20 INFO [com.example.OrderService] Order created successfully: order456
2023-12-25 10:30:21 ERROR [com.example.DatabaseConnection] Connection timeout: java.sql.SQLException
2023-12-25 10:30:22 WARN [com.example.SecurityService] Suspicious XSS attack pattern detected
2023-12-25 10:30:23 INFO [com.example.UserService] User logout: user123
2023-12-25 10:30:24 ERROR [com.example.PaymentService] Unauthorized access attempt from 172.16.0.10
""".trimIndent()
val analyzer = LogAnalyzer()
// 解析日志
val logEntries = analyzer.parseLogFile(logContent)
println("解析了 ${logEntries.size} 条日志记录")
println()
// 生成分析报告
val report = analyzer.generateReport(logEntries)
println(report)
}性能优化
正则表达式优化技巧
kotlin
import kotlin.system.measureTimeMillis
class RegexPerformanceDemo {
fun demonstrateCompilation() {
println("=== 正则表达式编译优化 ===")
val testText = "The quick brown fox jumps over the lazy dog. " * 1000
val pattern = """\b\w{5}\b""" // 匹配5个字母的单词
// 不好的做法:每次都重新编译
val time1 = measureTimeMillis {
repeat(1000) {
pattern.toRegex().findAll(testText).count()
}
}
// 好的做法:预编译正则表达式
val compiledRegex = pattern.toRegex()
val time2 = measureTimeMillis {
repeat(1000) {
compiledRegex.findAll(testText).count()
}
}
println("重复编译耗时: ${time1}ms")
println("预编译耗时: ${time2}ms")
println("性能提升: ${(time1.toDouble() / time2).format(2)}倍")
println()
}
fun demonstrateAnchoring() {
println("=== 锚点优化 ===")
val testStrings = (1..10000).map { "test string $it with some content" }
// 不好的做法:没有锚点
val unanchoredRegex = Regex("""test string \d+""")
// 好的做法:使用锚点
val anchoredRegex = Regex("""^test string \d+""")
val time1 = measureTimeMillis {
testStrings.forEach { unanchoredRegex.containsMatchIn(it) }
}
val time2 = measureTimeMillis {
testStrings.forEach { anchoredRegex.containsMatchIn(it) }
}
println("无锚点耗时: ${time1}ms")
println("有锚点耗时: ${time2}ms")
println("性能提升: ${(time1.toDouble() / time2).format(2)}倍")
println()
}
fun demonstrateAlternatives() {
println("=== 选择优化 ===")
val testText = "apple banana cherry date elderberry fig grape"
// 不好的做法:复杂的选择
val complexRegex = Regex("""apple|banana|cherry|date|elderberry|fig|grape""")
// 好的做法:优化的选择(按频率排序)
val optimizedRegex = Regex("""apple|banana|cherry|grape|date|fig|elderberry""")
// 更好的做法:使用字符类
val characterClassRegex = Regex("""\b[abcdefg]\w+""")
val iterations = 100000
val time1 = measureTimeMillis {
repeat(iterations) { complexRegex.findAll(testText).count() }
}
val time2 = measureTimeMillis {
repeat(iterations) { optimizedRegex.findAll(testText).count() }
}
val time3 = measureTimeMillis {
repeat(iterations) { characterClassRegex.findAll(testText).count() }
}
println("复杂选择耗时: ${time1}ms")
println("优化选择耗时: ${time2}ms")
println("字符类耗时: ${time3}ms")
println()
}
private fun Double.format(digits: Int) = "%.${digits}f".format(this)
}
fun main() {
val demo = RegexPerformanceDemo()
demo.demonstrateCompilation()
demo.demonstrateAnchoring()
demo.demonstrateAlternatives()
println("=== 最佳实践总结 ===")
println("1. 预编译经常使用的正则表达式")
println("2. 使用锚点(^, $)限制搜索范围")
println("3. 优化选择的顺序,常见的放在前面")
println("4. 使用字符类代替复杂的选择")
println("5. 避免过度使用回溯")
println("6. 使用非捕获组(?:)当不需要捕获时")
}最佳实践
1. 正则表达式设计原则
kotlin
// 好的做法:清晰、可读的正则表达式
class RegexBestPractices {
companion object {
// 使用命名常量
val EMAIL_REGEX = Regex("""
^[a-zA-Z0-9._%+-]+ # 用户名部分
@ # @ 符号
[a-zA-Z0-9.-]+ # 域名部分
\. # 点号
[a-zA-Z]{2,}$ # 顶级域名
""".trimIndent(), RegexOption.COMMENTS)
// 分解复杂模式
private val PHONE_AREA_CODE = """\d{3}"""
private val PHONE_EXCHANGE = """\d{3}"""
private val PHONE_NUMBER = """\d{4}"""
val PHONE_REGEX = Regex("^$PHONE_AREA_CODE-$PHONE_EXCHANGE-$PHONE_NUMBER$")
}
// 提供验证方法而不是直接暴露正则表达式
fun isValidEmail(email: String): Boolean = EMAIL_REGEX.matches(email)
fun isValidPhone(phone: String): Boolean = PHONE_REGEX.matches(phone)
}2. 错误处理
kotlin
fun safeRegexOperation(pattern: String, text: String): Result<List<String>> {
return try {
val regex = Regex(pattern)
val matches = regex.findAll(text).map { it.value }.toList()
Result.success(matches)
} catch (e: Exception) {
Result.failure(e)
}
}3. 测试正则表达式
kotlin
class RegexTester {
fun testEmailRegex() {
val emailRegex = Regex("""^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$""")
val validEmails = listOf(
"user@example.com",
"test.email@domain.org",
"user+tag@example.co.uk"
)
val invalidEmails = listOf(
"invalid-email",
"@domain.com",
"user@",
"user@domain"
)
println("=== 邮箱正则表达式测试 ===")
validEmails.forEach { email ->
val isValid = emailRegex.matches(email)
println("$email: ${if (isValid) "✓" else "✗"}")
assert(isValid) { "应该匹配: $email" }
}
invalidEmails.forEach { email ->
val isValid = emailRegex.matches(email)
println("$email: ${if (!isValid) "✓" else "✗"}")
assert(!isValid) { "不应该匹配: $email" }
}
}
}下一步
掌握了正则表达式后,让我们学习Kotlin的标准库,了解丰富的内置函数和工具类。
下一章: 标准库
练习题
- 编写一个文本处理工具,支持多种格式的数据提取和转换
- 创建一个表单验证系统,验证各种用户输入格式
- 实现一个简单的模板引擎,支持变量替换和条件渲染
- 设计一个代码格式化工具,使用正则表达式美化代码
- 开发一个日志监控系统,实时分析日志文件并报警异常模式