JavaScript 正则表达式
正则表达式(Regular Expression,简称 RegExp)是一种强大的文本处理工具,用于匹配、查找、替换和验证字符串模式。在 JavaScript 中,正则表达式广泛应用于表单验证、数据清洗、文本解析等场景。掌握正则表达式对于处理复杂的字符串操作至关重要。在本章节中,我们将深入学习 JavaScript 中正则表达式的使用方法。
什么是正则表达式
正则表达式是一种特殊的字符串模式,用于描述一系列符合某个句法规则的字符串。它提供了一种简洁而灵活的方式来匹配、查找和管理文本。
javascript
// 简单的正则表达式示例
const pattern = /hello/;
const text = "hello world";
console.log(pattern.test(text)); // true创建正则表达式
在 JavaScript 中有两种方式创建正则表达式:
1. 字面量语法(推荐)
javascript
const regex = /pattern/flags;
const emailPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
const phonePattern = /^\d{3}-\d{3}-\d{4}$/;2. RegExp 构造函数
javascript
const regex = new RegExp("pattern", "flags");
const emailPattern = new RegExp("^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$");
const phonePattern = new RegExp("^\\d{3}-\\d{3}-\\d{4}$");
// 动态创建正则表达式
const searchTerm = "JavaScript";
const searchRegex = new RegExp(searchTerm, "gi");正则表达式标志(Flags)
标志用于修改正则表达式的行为:
javascript
// g - 全局匹配
const text = "hello hello hello";
const globalRegex = /hello/g;
console.log(text.match(globalRegex)); // ["hello", "hello", "hello"]
// i - 忽略大小写
const caseInsensitive = /hello/i;
console.log(caseInsensitive.test("HELLO")); // true
// m - 多行匹配
const multiline = /^hello/m;
const multilineText = "world\nhello\njavascript";
console.log(multiline.test(multilineText)); // true
// s - dotAll 模式(ES2018)
const dotAll = /hello.world/s;
console.log(dotAll.test("hello\nworld")); // true (没有 s 标志时为 false)
// u - Unicode 模式
const unicode = /\u{1F600}/u;
console.log(unicode.test("😀")); // true
// y - 粘性匹配
const sticky = /hello/y;
const stickyText = "hello world";
console.log(sticky.test(stickyText)); // true
sticky.lastIndex = 1;
console.log(sticky.test(stickyText)); // false正则表达式模式语法
字面量字符
javascript
// 普通字符直接匹配
const regex1 = /hello/;
console.log(regex1.test("hello world")); // true
// 特殊字符需要转义
const regex2 = /\$/; // 匹配美元符号
console.log(regex2.test("$100")); // true
const regex3 = /\./; // 匹配点号
console.log(regex3.test("a.b")); // true字符类
javascript
// 简单字符类
const vowel = /[aeiou]/;
console.log(vowel.test("hello")); // true
// 范围字符类
const digit = /[0-9]/;
console.log(digit.test("abc123")); // true
const letter = /[a-zA-Z]/;
console.log(letter.test("123abc")); // true
// 反向字符类
const nonDigit = /[^0-9]/;
console.log(nonDigit.test("123a")); // true
// 预定义字符类
console.log(/\d/.test("123")); // true (数字)
console.log(/\D/.test("abc")); // true (非数字)
console.log(/\w/.test("hello")); // true (单词字符)
console.log(/\W/.test("!?")); // true (非单词字符)
console.log(/\s/.test(" ")); // true (空白字符)
console.log(/\S/.test("a")); // true (非空白字符)量词
javascript
// 精确数量
const exactlyThree = /a{3}/;
console.log(exactlyThree.test("aaab")); // true
// 范围数量
const twoToFour = /a{2,4}/;
console.log(twoToFour.test("aaa")); // true
// 至少数量
const atLeastTwo = /a{2,}/;
console.log(atLeastTwo.test("aaaa")); // true
// 零次或一次
const optional = /colou?r/;
console.log(optional.test("color")); // true
console.log(optional.test("colour")); // true
// 零次或多次
const zeroOrMore = /a*/;
console.log(zeroOrMore.test("b")); // true
console.log(zeroOrMore.test("aaa")); // true
// 一次或多次
const oneOrMore = /a+/;
console.log(oneOrMore.test("a")); // true
console.log(oneOrMore.test("aaaa")); // true
console.log(oneOrMore.test("b")); // false边界匹配
javascript
// 单词边界
const wordBoundary = /\bhello\b/;
console.log(wordBoundary.test("hello world")); // true
console.log(wordBoundary.test("hello-world")); // true
console.log(wordBoundary.test("sayhello")); // false
// 行首匹配
const start = /^hello/;
console.log(start.test("hello world")); // true
console.log(start.test("say hello")); // false
// 行尾匹配
const end = /world$/;
console.log(end.test("hello world")); // true
console.log(end.test("world hello")); // false
// 字符串开始和结束
const entireString = /^hello world$/;
console.log(entireString.test("hello world")); // true
console.log(entireString.test("hello world!")); // false分组和捕获
javascript
// 简单分组
const phone = /(\d{3})-(\d{3})-(\d{4})/;
const match = "123-456-7890".match(phone);
console.log(match[0]); // "123-456-7890" (完整匹配)
console.log(match[1]); // "123" (第一个分组)
console.log(match[2]); // "456" (第二个分组)
console.log(match[3]); // "7890" (第三个分组)
// 非捕获分组
const nonCapturing = /(?:https?):\/\/[^\s]+/;
console.log(nonCapturing.test("https://example.com")); // true
// 命名捕获组(ES2018)
const namedGroups = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
const dateMatch = "2024-01-15".match(namedGroups);
console.log(dateMatch.groups.year); // "2024"
console.log(dateMatch.groups.month); // "01"
console.log(dateMatch.groups.day); // "15"选择和前瞻
javascript
// 选择操作符
const choice = /cat|dog/;
console.log(choice.test("I have a cat")); // true
console.log(choice.test("I have a dog")); // true
// 正向前瞻
const positiveLookahead = /password(?=\d)/;
console.log(positiveLookahead.test("password123")); // true
console.log(positiveLookahead.test("password")); // false
// 负向前瞻
const negativeLookahead = /password(?!\d)/;
console.log(negativeLookahead.test("password")); // true
console.log(negativeLookahead.test("password123")); // false
// 正向后顾(ES2018)
const positiveLookbehind = /(?<=\d)\$/;
console.log(positiveLookbehind.test("100$")); // true
console.log(positiveLookbehind.test("$100")); // false
// 负向后顾(ES2018)
const negativeLookbehind = /(?<!\d)\$/;
console.log(negativeLookbehind.test("$100")); // true
console.log(negativeLookbehind.test("100$")); // false正则表达式对象的方法
test() 方法
javascript
const regex = /hello/;
console.log(regex.test("hello world")); // true
console.log(regex.test("goodbye")); // falseexec() 方法
javascript
const regex = /(\d{4})-(\d{2})-(\d{2})/;
const result = regex.exec("Today is 2024-01-15");
console.log(result[0]); // "2024-01-15"
console.log(result[1]); // "2024"
console.log(result[2]); // "01"
console.log(result[3]); // "15"
console.log(result.index); // 9
console.log(result.input); // "Today is 2024-01-15"字符串对象的正则方法
match() 方法
javascript
const text = "The phone numbers are 123-456-7890 and 098-765-4321";
// 不使用全局标志
const singleMatch = text.match(/\d{3}-\d{3}-\d{4}/);
console.log(singleMatch); // ["123-456-7890"]
// 使用全局标志
const allMatches = text.match(/\d{3}-\d{3}-\d{4}/g);
console.log(allMatches); // ["123-456-7890", "098-765-4321"]
// 捕获组
const capturing = text.match(/(\d{3})-(\d{3})-(\d{4})/);
console.log(capturing[1]); // "123"
console.log(capturing[2]); // "456"
console.log(capturing[3]); // "7890"search() 方法
javascript
const text = "Hello world, welcome to JavaScript";
console.log(text.search(/world/)); // 6
console.log(text.search(/javascript/i)); // 25
console.log(text.search(/python/)); // -1 (未找到)replace() 方法
javascript
const text = "Hello world, hello JavaScript";
// 基本替换
console.log(text.replace(/hello/i, "Hi")); // "Hi world, hello JavaScript"
// 全局替换
console.log(text.replace(/hello/gi, "Hi")); // "Hi world, Hi JavaScript"
// 使用捕获组
const phone = "Call me at 123-456-7890";
console.log(phone.replace(/(\d{3})-(\d{3})-(\d{4})/, "($1) $2-$3")); // "Call me at (123) 456-7890"
// 使用函数替换
const prices = "The price is $29.99 and $15.50";
console.log(prices.replace(/\$(\d+\.\d+)/g, (match, price) => {
return `$${(parseFloat(price) * 1.1).toFixed(2)}`;
})); // "The price is $32.99 and $17.05"split() 方法
javascript
const text = "apple,banana;orange:grape";
// 使用正则表达式分割
console.log(text.split(/[,;:]/)); // ["apple", "banana", "orange", "grape"]
// 限制分割数量
console.log(text.split(/[,;:]/, 2)); // ["apple", "banana"]正则表达式的高级特性
Unicode 属性转义(ES2018)
javascript
// 匹配所有字母
const letter = /\p{L}/u;
console.log(letter.test("Hello")); // true
console.log(letter.test("你好")); // true
// 匹配数字
const number = /\p{N}/u;
console.log(number.test("123")); // true
console.log(number.test("一二三")); // true
// 匹配标点符号
const punctuation = /\p{P}/u;
console.log(punctuation.test("!")); // true
console.log(punctuation.test("!")); // true回溯引用
javascript
// 匹配重复的单词
const repeatedWord = /\b(\w+)\s+\1\b/;
console.log(repeatedWord.test("hello hello world")); // true
// 匹配回文结构
const palindrome = /^(\w)\w?\1$/;
console.log(palindrome.test("aba")); // true
console.log(palindrome.test("abba")); // false (需要更复杂的模式)常用正则表达式模式
表单验证
javascript
// 邮箱验证
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
console.log(emailRegex.test("user@example.com")); // true
// 电话号码验证
const phoneRegex = /^\d{3}-\d{3}-\d{4}$/;
console.log(phoneRegex.test("123-456-7890")); // true
// 密码强度验证(至少8位,包含大小写字母和数字)
const passwordRegex = /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d@$!%*?&]{8,}$/;
console.log(passwordRegex.test("Password123")); // true
// 邮政编码验证
const zipCodeRegex = /^\d{5}(-\d{4})?$/;
console.log(zipCodeRegex.test("12345")); // true
console.log(zipCodeRegex.test("12345-6789")); // true数据提取
javascript
// 提取 URL 参数
const url = "https://example.com?name=张三&age=25&city=北京";
const paramRegex = /[?&]([^=]+)=([^&]*)/g;
const params = {};
let match;
while ((match = paramRegex.exec(url)) !== null) {
params[decodeURIComponent(match[1])] = decodeURIComponent(match[2]);
}
console.log(params); // { name: "张三", age: "25", city: "北京" }
// 提取 HTML 标签内容
const html = "<p>Hello</p><div>World</div>";
const tagRegex = /<(\w+)>(.*?)<\/\1>/g;
const tags = [];
while ((match = tagRegex.exec(html)) !== null) {
tags.push({
tag: match[1],
content: match[2]
});
}
console.log(tags); // [{ tag: "p", content: "Hello" }, { tag: "div", content: "World" }]正则表达式的性能优化
1. 避免回溯灾难
javascript
// 不好的模式(可能导致回溯灾难)
const badPattern = /(a+)+b/;
// 好的模式
const goodPattern = /a+b/;2. 使用具体量词
javascript
// 不明确的量词
const vague = /\w*/;
// 具体量词
const specific = /\w{0,100}/;3. 避免不必要的捕获
javascript
// 不必要的捕获
const unnecessary = /(\w+)@(\w+)\.(\w+)/;
// 非捕获分组
const nonCapturing = /(?:\w+)@(?:\w+)\.(?:\w+)/;正则表达式的最佳实践
1. 预编译正则表达式
javascript
// 好的做法:预编译
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
function validateEmail(email) {
return emailRegex.test(email);
}
// 避免在循环中创建正则表达式
function validateEmails(emails) {
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; // 不好的做法
return emails.every(email => emailRegex.test(email));
}2. 合理使用标志
javascript
// 根据需要选择合适的标志
const multilineText = "line1\nline2\nline3";
// 需要多行匹配时使用 m 标志
const lineStart = /^line/m;
console.log(multilineText.match(lineStart)); // ["line", "line", "line"]
// 不需要时避免使用全局标志
const singleMatch = multilineText.match(/line/); // 只匹配第一个3. 错误处理
javascript
function safeRegex(pattern, flags) {
try {
return new RegExp(pattern, flags);
} catch (error) {
console.error("无效的正则表达式:", error.message);
return null;
}
}
const regex = safeRegex("[invalid", "g");
if (regex) {
// 使用正则表达式
}实际应用示例
1. 表单验证器
javascript
class FormValidator {
static patterns = {
email: /^[^\s@]+@[^\s@]+\.[^\s@]+$/,
phone: /^\d{3}-\d{3}-\d{4}$/,
zipCode: /^\d{5}(-\d{4})?$/,
password: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d@$!%*?&]{8,}$/,
url: /^https?:\/\/[^\s/$.?#].[^\s]*$/i,
creditCard: /^\d{4}-\d{4}-\d{4}-\d{4}$/
};
static validate(field, value, patternName) {
const pattern = this.patterns[patternName];
if (!pattern) {
throw new Error(`未知的验证模式: ${patternName}`);
}
return {
isValid: pattern.test(value),
field: field,
value: value,
pattern: patternName
};
}
static validateAll(formData) {
const results = [];
for (let [field, { value, pattern }] of Object.entries(formData)) {
results.push(this.validate(field, value, pattern));
}
return results;
}
}
// 使用示例
const formData = {
email: { value: "user@example.com", pattern: "email" },
phone: { value: "123-456-7890", pattern: "phone" },
password: { value: "Password123", pattern: "password" }
};
const validationResults = FormValidator.validateAll(formData);
console.log(validationResults);2. 文本处理器
javascript
class TextProcessor {
// 高亮关键词
static highlight(text, keywords, highlightTag = "mark") {
if (!Array.isArray(keywords)) {
keywords = [keywords];
}
let processedText = text;
keywords.forEach(keyword => {
const escapedKeyword = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const regex = new RegExp(`(${escapedKeyword})`, 'gi');
processedText = processedText.replace(regex, `<${highlightTag}>$1</${highlightTag}>`);
});
return processedText;
}
// 提取链接
static extractLinks(text) {
const urlRegex = /https?:\/\/[^\s/$.?#].[^\s]*/gi;
return text.match(urlRegex) || [];
}
// 清理 HTML 标签
static stripHtml(html) {
return html.replace(/<[^>]*>/g, '');
}
// 格式化电话号码
static formatPhone(text) {
const phoneRegex = /(\d{3})(\d{3})(\d{4})/g;
return text.replace(phoneRegex, '$1-$2-$3');
}
// 统计词频
static wordFrequency(text) {
const words = text.toLowerCase().match(/\b\w+\b/g) || [];
const frequency = {};
words.forEach(word => {
frequency[word] = (frequency[word] || 0) + 1;
});
return frequency;
}
}
// 使用示例
const text = "访问 https://example.com 了解更多信息。联系电话:1234567890";
console.log(TextProcessor.highlight(text, ["访问", "联系"]));
console.log(TextProcessor.extractLinks(text));
console.log(TextProcessor.formatPhone("1234567890"));
console.log(TextProcessor.wordFrequency("hello world hello javascript world"));3. 日志分析器
javascript
class LogAnalyzer {
static patterns = {
ipAddress: /(\d{1,3}\.){3}\d{1,3}/,
timestamp: /\[(\d{2}\/[A-Z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\]/,
statusCode: /" (\d{3}) /,
userAgent: /"([^"]*)"$/,
request: /"([A-Z]+) ([^"]+) HTTP\/[\d.]+"/
};
static parseLogLine(logLine) {
const result = {};
// 解析 IP 地址
const ipMatch = logLine.match(this.patterns.ipAddress);
if (ipMatch) {
result.ipAddress = ipMatch[0];
}
// 解析时间戳
const timeMatch = logLine.match(this.patterns.timestamp);
if (timeMatch) {
result.timestamp = timeMatch[1];
}
// 解析状态码
const statusMatch = logLine.match(this.patterns.statusCode);
if (statusMatch) {
result.statusCode = parseInt(statusMatch[1]);
}
// 解析请求
const requestMatch = logLine.match(this.patterns.request);
if (requestMatch) {
result.method = requestMatch[1];
result.url = requestMatch[2];
}
// 解析 User Agent
const userAgentMatch = logLine.match(this.patterns.userAgent);
if (userAgentMatch) {
result.userAgent = userAgentMatch[1];
}
return result;
}
static analyzeLogs(logLines) {
const analysis = {
totalRequests: 0,
statusCodes: {},
ipAddresses: {},
methods: {},
errors: 0
};
logLines.forEach(line => {
const parsed = this.parseLogLine(line);
if (Object.keys(parsed).length > 0) {
analysis.totalRequests++;
// 统计状态码
if (parsed.statusCode) {
analysis.statusCodes[parsed.statusCode] =
(analysis.statusCodes[parsed.statusCode] || 0) + 1;
if (parsed.statusCode >= 400) {
analysis.errors++;
}
}
// 统计 IP 地址
if (parsed.ipAddress) {
analysis.ipAddresses[parsed.ipAddress] =
(analysis.ipAddresses[parsed.ipAddress] || 0) + 1;
}
// 统计请求方法
if (parsed.method) {
analysis.methods[parsed.method] =
(analysis.methods[parsed.method] || 0) + 1;
}
}
});
return analysis;
}
}
// 使用示例
const logLines = [
'192.168.1.1 - - [10/Jan/2024:12:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "Mozilla/5.0"',
'192.168.1.2 - - [10/Jan/2024:12:01:00 +0000] "POST /api/users HTTP/1.1" 201 567 "Mozilla/5.0"',
'192.168.1.1 - - [10/Jan/2024:12:02:00 +0000] "GET /about.html HTTP/1.1" 404 0 "Mozilla/5.0"'
];
const analysis = LogAnalyzer.analyzeLogs(logLines);
console.log(analysis);总结
JavaScript 正则表达式的核心要点:
- 创建方式:字面量语法、RegExp 构造函数
- 标志:g(全局)、i(忽略大小写)、m(多行)、s(dotAll)、u(Unicode)、y(粘性)
- 模式语法:字符类、量词、边界匹配、分组、选择、前瞻后顾
- 方法:test()、exec()、match()、search()、replace()、split()
- 高级特性:Unicode 属性转义、命名捕获组、回溯引用
- 常用模式:表单验证、数据提取、文本处理
- 性能优化:避免回溯灾难、预编译、合理使用标志
- 最佳实践:错误处理、预编译、适当的模式选择
掌握正则表达式是处理复杂文本操作的关键技能。在下一章节中,我们将学习 JavaScript 的错误处理。