Skip to content

文件处理

概述

文件操作是 Node.js 应用程序的基础。本章介绍读取、写入、流处理、文件系统操作以及高效安全处理文件的最佳实践。

基础文件操作

读取文件

javascript
// file-reading.js
const fs = require('fs');
const path = require('path');

// Synchronous file reading (blocking)
function readFileSync(filePath) {
  try {
    const data = fs.readFileSync(filePath, 'utf8');
    console.log('File read synchronously:', data.length, 'characters');
    return data;
  } catch (error) {
    console.error('Sync read error:', error.message);
    throw error;
  }
}

// Asynchronous file reading with callbacks
function readFileCallback(filePath, callback) {
  fs.readFile(filePath, 'utf8', (error, data) => {
    if (error) {
      return callback(error, null);
    }
    console.log('File read with callback:', data.length, 'characters');
    callback(null, data);
  });
}

// Promise-based file reading
function readFilePromise(filePath) {
  return fs.promises.readFile(filePath, 'utf8')
    .then(data => {
      console.log('File read with promise:', data.length, 'characters');
      return data;
    })
    .catch(error => {
      console.error('Promise read error:', error.message);
      throw error;
    });
}

// Async/await file reading
async function readFileAsync(filePath) {
  try {
    const data = await fs.promises.readFile(filePath, 'utf8');
    console.log('File read with async/await:', data.length, 'characters');
    return data;
  } catch (error) {
    console.error('Async read error:', error.message);
    throw error;
  }
}

// Reading binary files
async function readBinaryFile(filePath) {
  try {
    const buffer = await fs.promises.readFile(filePath);
    console.log('Binary file read:', buffer.length, 'bytes');
    return buffer;
  } catch (error) {
    console.error('Binary read error:', error.message);
    throw error;
  }
}

// Reading files in chunks
function readFileInChunks(filePath, chunkSize = 1024) {
  return new Promise((resolve, reject) => {
    const chunks = [];
    const stream = fs.createReadStream(filePath, { 
      encoding: 'utf8',
      highWaterMark: chunkSize 
    });

    stream.on('data', (chunk) => {
      console.log('Chunk received:', chunk.length, 'characters');
      chunks.push(chunk);
    });

    stream.on('end', () => {
      const data = chunks.join('');
      console.log('File read in chunks:', data.length, 'total characters');
      resolve(data);
    });

    stream.on('error', (error) => {
      console.error('Chunk read error:', error.message);
      reject(error);
    });
  });
}

// Usage examples
async function demonstrateReading() {
  const filePath = 'package.json';
  
  try {
    // Different reading methods
    await readFileAsync(filePath);
    await readFilePromise(filePath);
    await readFileInChunks(filePath, 512);
    
    readFileCallback(filePath, (error, data) => {
      if (error) {
        console.error('Callback error:', error.message);
      } else {
        console.log('Callback success');
      }
    });
  } catch (error) {
    console.error('Reading demonstration error:', error.message);
  }
}

写入文件

javascript
// file-writing.js
const fs = require('fs');
const path = require('path');

// Synchronous file writing
function writeFileSync(filePath, data) {
  try {
    fs.writeFileSync(filePath, data, 'utf8');
    console.log('File written synchronously');
  } catch (error) {
    console.error('Sync write error:', error.message);
    throw error;
  }
}

// Asynchronous file writing
async function writeFileAsync(filePath, data) {
  try {
    await fs.promises.writeFile(filePath, data, 'utf8');
    console.log('File written asynchronously');
  } catch (error) {
    console.error('Async write error:', error.message);
    throw error;
  }
}

// Appending to files
async function appendToFile(filePath, data) {
  try {
    await fs.promises.appendFile(filePath, data, 'utf8');
    console.log('Data appended to file');
  } catch (error) {
    console.error('Append error:', error.message);
    throw error;
  }
}

// Writing JSON data
async function writeJSONFile(filePath, data) {
  try {
    const jsonString = JSON.stringify(data, null, 2);
    await fs.promises.writeFile(filePath, jsonString, 'utf8');
    console.log('JSON file written');
  } catch (error) {
    console.error('JSON write error:', error.message);
    throw error;
  }
}

// Writing with streams (for large files)
function writeFileStream(filePath, data) {
  return new Promise((resolve, reject) => {
    const stream = fs.createWriteStream(filePath, { encoding: 'utf8' });
    
    stream.on('error', (error) => {
      console.error('Stream write error:', error.message);
      reject(error);
    });
    
    stream.on('finish', () => {
      console.log('Stream write completed');
      resolve();
    });
    
    // Write data in chunks if it's large
    if (typeof data === 'string' && data.length > 1024 * 1024) {
      const chunkSize = 1024 * 1024; // 1MB chunks
      for (let i = 0; i < data.length; i += chunkSize) {
        const chunk = data.slice(i, i + chunkSize);
        stream.write(chunk);
      }
    } else {
      stream.write(data);
    }
    
    stream.end();
  });
}

// Atomic file writing (write to temp file, then rename)
async function writeFileAtomic(filePath, data) {
  const tempPath = filePath + '.tmp';
  
  try {
    await fs.promises.writeFile(tempPath, data, 'utf8');
    await fs.promises.rename(tempPath, filePath);
    console.log('File written atomically');
  } catch (error) {
    // Clean up temp file if it exists
    try {
      await fs.promises.unlink(tempPath);
    } catch (cleanupError) {
      // Ignore cleanup errors
    }
    console.error('Atomic write error:', error.message);
    throw error;
  }
}

// Usage examples
async function demonstrateWriting() {
  const testData = {
    message: 'Hello, World!',
    timestamp: new Date().toISOString(),
    numbers: [1, 2, 3, 4, 5]
  };
  
  try {
    await writeJSONFile('test-output.json', testData);
    await appendToFile('test-log.txt', `Log entry: ${new Date()}\n`);
    await writeFileAtomic('atomic-test.txt', 'This was written atomically');
    await writeFileStream('stream-test.txt', 'This was written with streams');
  } catch (error) {
    console.error('Writing demonstration error:', error.message);
  }
}

文件系统操作

目录操作

javascript
// directory-operations.js
const fs = require('fs');
const path = require('path');

class DirectoryManager {
  // Create directory (recursive)
  async createDirectory(dirPath) {
    try {
      await fs.promises.mkdir(dirPath, { recursive: true });
      console.log('Directory created:', dirPath);
    } catch (error) {
      console.error('Create directory error:', error.message);
      throw error;
    }
  }

  // List directory contents
  async listDirectory(dirPath, options = {}) {
    try {
      const items = await fs.promises.readdir(dirPath, { withFileTypes: true });
      
      const result = {
        files: [],
        directories: [],
        total: items.length
      };

      for (const item of items) {
        const itemPath = path.join(dirPath, item.name);
        const stats = await fs.promises.stat(itemPath);
        
        const itemInfo = {
          name: item.name,
          path: itemPath,
          size: stats.size,
          created: stats.birthtime,
          modified: stats.mtime,
          isDirectory: item.isDirectory(),
          isFile: item.isFile()
        };

        if (item.isDirectory()) {
          result.directories.push(itemInfo);
        } else {
          result.files.push(itemInfo);
        }
      }

      if (options.sortBy) {
        const sortFn = (a, b) => {
          const aValue = a[options.sortBy];
          const bValue = b[options.sortBy];
          return options.sortOrder === 'desc' ? bValue - aValue : aValue - bValue;
        };
        
        result.files.sort(sortFn);
        result.directories.sort(sortFn);
      }

      return result;
    } catch (error) {
      console.error('List directory error:', error.message);
      throw error;
    }
  }

  // Copy directory recursively
  async copyDirectory(source, destination) {
    try {
      await this.createDirectory(destination);
      const items = await fs.promises.readdir(source, { withFileTypes: true });

      for (const item of items) {
        const sourcePath = path.join(source, item.name);
        const destPath = path.join(destination, item.name);

        if (item.isDirectory()) {
          await this.copyDirectory(sourcePath, destPath);
        } else {
          await fs.promises.copyFile(sourcePath, destPath);
        }
      }

      console.log('Directory copied:', source, '->', destination);
    } catch (error) {
      console.error('Copy directory error:', error.message);
      throw error;
    }
  }

  // Remove directory recursively
  async removeDirectory(dirPath) {
    try {
      await fs.promises.rm(dirPath, { recursive: true, force: true });
      console.log('Directory removed:', dirPath);
    } catch (error) {
      console.error('Remove directory error:', error.message);
      throw error;
    }
  }

  // Get directory size
  async getDirectorySize(dirPath) {
    try {
      let totalSize = 0;
      const items = await fs.promises.readdir(dirPath, { withFileTypes: true });

      for (const item of items) {
        const itemPath = path.join(dirPath, item.name);
        
        if (item.isDirectory()) {
          totalSize += await this.getDirectorySize(itemPath);
        } else {
          const stats = await fs.promises.stat(itemPath);
          totalSize += stats.size;
        }
      }

      return totalSize;
    } catch (error) {
      console.error('Get directory size error:', error.message);
      throw error;
    }
  }

  // Watch directory for changes
  watchDirectory(dirPath, callback) {
    try {
      const watcher = fs.watch(dirPath, { recursive: true }, (eventType, filename) => {
        callback({
          event: eventType,
          filename,
          path: path.join(dirPath, filename || ''),
          timestamp: new Date()
        });
      });

      console.log('Watching directory:', dirPath);
      return watcher;
    } catch (error) {
      console.error('Watch directory error:', error.message);
      throw error;
    }
  }
}

// Usage example
async function demonstrateDirectoryOperations() {
  const dirManager = new DirectoryManager();
  
  try {
    await dirManager.createDirectory('test-dir/sub-dir');
    
    const contents = await dirManager.listDirectory('.', { sortBy: 'modified', sortOrder: 'desc' });
    console.log('Directory contents:', contents);
    
    const size = await dirManager.getDirectorySize('.');
    console.log('Directory size:', size, 'bytes');
    
    // Watch for changes
    const watcher = dirManager.watchDirectory('.', (change) => {
      console.log('Directory change:', change);
    });
    
    // Stop watching after 10 seconds
    setTimeout(() => {
      watcher.close();
      console.log('Stopped watching directory');
    }, 10000);
    
  } catch (error) {
    console.error('Directory operations error:', error.message);
  }
}

module.exports = DirectoryManager;

文件流

高级流操作

javascript
// file-streaming.js
const fs = require('fs');
const { pipeline, Transform } = require('stream');
const { promisify } = require('util');
const pipelineAsync = promisify(pipeline);

// Custom transform streams
class LineCounter extends Transform {
  constructor() {
    super({ objectMode: true });
    this.lineCount = 0;
  }

  _transform(chunk, encoding, callback) {
    const lines = chunk.toString().split('\n');
    this.lineCount += lines.length - 1; // -1 because last split might not be complete line
    callback(null, chunk);
  }

  _flush(callback) {
    console.log('Total lines processed:', this.lineCount);
    callback();
  }
}

class DataProcessor extends Transform {
  constructor(processFn) {
    super();
    this.processFn = processFn;
  }

  _transform(chunk, encoding, callback) {
    try {
      const processed = this.processFn(chunk);
      callback(null, processed);
    } catch (error) {
      callback(error);
    }
  }
}

// File processing with streams
async function processLargeFile(inputPath, outputPath, processor) {
  try {
    const readStream = fs.createReadStream(inputPath);
    const writeStream = fs.createWriteStream(outputPath);
    const lineCounter = new LineCounter();
    const dataProcessor = new DataProcessor(processor);

    await pipelineAsync(
      readStream,
      lineCounter,
      dataProcessor,
      writeStream
    );

    console.log('File processing completed');
  } catch (error) {
    console.error('Stream processing error:', error.message);
    throw error;
  }
}

// CSV file processing
class CSVProcessor extends Transform {
  constructor(options = {}) {
    super({ objectMode: true });
    this.headers = null;
    this.delimiter = options.delimiter || ',';
    this.skipHeader = options.skipHeader || false;
    this.rowCount = 0;
  }

  _transform(chunk, encoding, callback) {
    const lines = chunk.toString().split('\n');
    
    for (const line of lines) {
      if (!line.trim()) continue;
      
      const values = line.split(this.delimiter);
      
      if (!this.headers && !this.skipHeader) {
        this.headers = values.map(h => h.trim());
        continue;
      }
      
      if (!this.headers) {
        this.headers = values.map((_, i) => `column_${i}`);
      }
      
      const row = {};
      this.headers.forEach((header, index) => {
        row[header] = values[index]?.trim() || '';
      });
      
      this.rowCount++;
      this.push(JSON.stringify(row) + '\n');
    }
    
    callback();
  }

  _flush(callback) {
    console.log('CSV processing completed. Rows processed:', this.rowCount);
    callback();
  }
}

// Usage examples
async function demonstrateStreaming() {
  try {
    // Process text file (convert to uppercase)
    await processLargeFile(
      'input.txt',
      'output.txt',
      (chunk) => chunk.toString().toUpperCase()
    );

    // Process CSV file
    const csvProcessor = new CSVProcessor({ delimiter: ',' });
    const csvInput = fs.createReadStream('data.csv');
    const jsonOutput = fs.createWriteStream('data.json');

    await pipelineAsync(csvInput, csvProcessor, jsonOutput);
    
  } catch (error) {
    console.error('Streaming demonstration error:', error.message);
  }
}

module.exports = { processLargeFile, CSVProcessor, LineCounter, DataProcessor };

文件安全和验证

安全文件操作

javascript
// file-security.js
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');

class SecureFileHandler {
  constructor(options = {}) {
    this.allowedExtensions = options.allowedExtensions || ['.txt', '.json', '.csv'];
    this.maxFileSize = options.maxFileSize || 10 * 1024 * 1024; // 10MB
    this.uploadDir = options.uploadDir || './uploads';
    this.quarantineDir = options.quarantineDir || './quarantine';
  }

  // Validate file path (prevent directory traversal)
  validatePath(filePath) {
    const normalizedPath = path.normalize(filePath);
    const resolvedPath = path.resolve(normalizedPath);
    const allowedDir = path.resolve(this.uploadDir);

    if (!resolvedPath.startsWith(allowedDir)) {
      throw new Error('Invalid file path: Directory traversal detected');
    }

    return resolvedPath;
  }

  // Validate file extension
  validateExtension(filename) {
    const ext = path.extname(filename).toLowerCase();
    
    if (!this.allowedExtensions.includes(ext)) {
      throw new Error(`Invalid file extension: ${ext}. Allowed: ${this.allowedExtensions.join(', ')}`);
    }

    return true;
  }

  // Validate file size
  async validateFileSize(filePath) {
    try {
      const stats = await fs.promises.stat(filePath);
      
      if (stats.size > this.maxFileSize) {
        throw new Error(`File too large: ${stats.size} bytes. Max allowed: ${this.maxFileSize} bytes`);
      }

      return stats.size;
    } catch (error) {
      throw new Error(`Cannot validate file size: ${error.message}`);
    }
  }

  // Generate secure filename
  generateSecureFilename(originalName) {
    const ext = path.extname(originalName);
    const timestamp = Date.now();
    const random = crypto.randomBytes(8).toString('hex');
    
    return `${timestamp}_${random}${ext}`;
  }

  // Calculate file hash
  async calculateFileHash(filePath, algorithm = 'sha256') {
    return new Promise((resolve, reject) => {
      const hash = crypto.createHash(algorithm);
      const stream = fs.createReadStream(filePath);

      stream.on('data', (data) => {
        hash.update(data);
      });

      stream.on('end', () => {
        resolve(hash.digest('hex'));
      });

      stream.on('error', (error) => {
        reject(error);
      });
    });
  }

  // Scan file content for malicious patterns
  async scanFileContent(filePath) {
    try {
      const content = await fs.promises.readFile(filePath, 'utf8');
      
      // Basic malicious pattern detection
      const maliciousPatterns = [
        /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,
        /javascript:/gi,
        /vbscript:/gi,
        /onload\s*=/gi,
        /onerror\s*=/gi
      ];

      for (const pattern of maliciousPatterns) {
        if (pattern.test(content)) {
          throw new Error('Malicious content detected');
        }
      }

      return true;
    } catch (error) {
      if (error.message === 'Malicious content detected') {
        throw error;
      }
      // If file is not text, skip content scanning
      return true;
    }
  }

  // Secure file upload
  async secureUpload(sourceFile, originalName) {
    try {
      // Validate extension
      this.validateExtension(originalName);

      // Generate secure filename
      const secureFilename = this.generateSecureFilename(originalName);
      const targetPath = path.join(this.uploadDir, secureFilename);

      // Ensure upload directory exists
      await fs.promises.mkdir(this.uploadDir, { recursive: true });

      // Copy file to secure location
      await fs.promises.copyFile(sourceFile, targetPath);

      // Validate file size
      const fileSize = await this.validateFileSize(targetPath);

      // Scan for malicious content
      await this.scanFileContent(targetPath);

      // Calculate file hash for integrity
      const fileHash = await this.calculateFileHash(targetPath);

      console.log('File uploaded securely:', {
        originalName,
        secureFilename,
        size: fileSize,
        hash: fileHash
      });

      return {
        filename: secureFilename,
        path: targetPath,
        size: fileSize,
        hash: fileHash,
        uploadedAt: new Date()
      };

    } catch (error) {
      // Move suspicious files to quarantine
      if (error.message.includes('Malicious content')) {
        await this.quarantineFile(sourceFile, originalName);
      }
      
      console.error('Secure upload error:', error.message);
      throw error;
    }
  }

  // Quarantine suspicious files
  async quarantineFile(filePath, originalName) {
    try {
      await fs.promises.mkdir(this.quarantineDir, { recursive: true });
      
      const quarantineFilename = `${Date.now()}_${originalName}`;
      const quarantinePath = path.join(this.quarantineDir, quarantineFilename);
      
      await fs.promises.copyFile(filePath, quarantinePath);
      
      console.log('File quarantined:', quarantineFilename);
    } catch (error) {
      console.error('Quarantine error:', error.message);
    }
  }
}

module.exports = SecureFileHandler;

下一步

在下一章中,我们将探索 Node.js 的高级功能,包括集群、Worker 线程和性能优化。

关键要点

  • 使用异步文件操作避免阻塞事件循环
  • 流对于处理大文件很高效
  • 始终验证文件路径以防止安全漏洞
  • 为文件操作实施适当的错误处理
  • 对关键文件写入使用原子操作
  • 监控文件系统操作以进行性能优化

本站内容仅供学习和研究使用。