跳到主要内容

文件操作详解

学会读写文件,处理数据持久化

文件操作概述

为什么需要文件操作

  • 数据持久化:程序结束后保存数据
  • 配置管理:读取配置文件
  • 日志记录:保存运行日志
  • 数据交换:与其他程序交换数据

文件操作流程

# 1. 打开文件
# 2. 读取/写入
# 3. 关闭文件

# 传统方式
file = open("test.txt", "r")
content = file.read()
file.close()

# 推荐方式(with语句)
with open("test.txt", "r") as file:
content = file.read()
# 自动关闭

打开文件

open()函数

open(file, mode='r', encoding=None)

文件模式

模式说明
'r'只读(默认),文件必须存在
'w'只写,会覆盖原内容,不存在则创建
'a'追加,在末尾添加内容
'x'创建新文件,已存在则报错
'r+'读写
'w+'读写,会清空原内容
'a+'读写追加
'b'二进制模式(如'rb', 'wb')

编码

# 推荐指定编码(尤其是中文)
with open("test.txt", "r", encoding="utf-8") as f:
content = f.read()

读取文件

read() - 读取全部

with open("test.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)

# 读取指定字符数
with open("test.txt", "r", encoding="utf-8") as f:
first_10 = f.read(10) # 读取前10个字符

readline() - 读取一行

with open("test.txt", "r", encoding="utf-8") as f:
line1 = f.readline() # 读取第一行
line2 = f.readline() # 读取第二行

readlines() - 读取所有行

with open("test.txt", "r", encoding="utf-8") as f:
lines = f.readlines() # 返回列表
for line in lines:
print(line.strip()) # strip()去除换行符

逐行迭代(推荐)

# 内存友好,适合大文件
with open("test.txt", "r", encoding="utf-8") as f:
for line in f:
print(line.strip())

写入文件

write() - 写入字符串

# 覆盖写入
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello, World!\n")
f.write("你好,世界!\n")

writelines() - 写入多行

lines = ["第一行\n", "第二行\n", "第三行\n"]
with open("output.txt", "w", encoding="utf-8") as f:
f.writelines(lines)

追加写入

with open("log.txt", "a", encoding="utf-8") as f:
f.write("新的日志记录\n")

文件指针

with open("test.txt", "r") as f:
# tell() - 获取当前位置
print(f.tell()) # 0

content = f.read(5)
print(f.tell()) # 5

# seek(offset, whence) - 移动指针
# whence: 0=开头, 1=当前, 2=结尾
f.seek(0) # 回到开头
print(f.tell()) # 0

文件与目录操作

os模块

import os

# 当前工作目录
print(os.getcwd())

# 切换目录
os.chdir("/path/to/dir")

# 列出目录内容
print(os.listdir("."))

# 创建目录
os.mkdir("new_folder")
os.makedirs("path/to/nested/folder") # 递归创建

# 删除
os.remove("file.txt") # 删除文件
os.rmdir("empty_folder") # 删除空目录

# 重命名
os.rename("old.txt", "new.txt")

# 检查存在
print(os.path.exists("test.txt"))
print(os.path.isfile("test.txt"))
print(os.path.isdir("folder"))

os.path模块

import os.path

path = "/Users/name/Documents/test.txt"

# 路径操作
print(os.path.dirname(path)) # /Users/name/Documents
print(os.path.basename(path)) # test.txt
print(os.path.split(path)) # ('/Users/name/Documents', 'test.txt')
print(os.path.splitext(path)) # ('/Users/name/Documents/test', '.txt')

# 路径拼接
new_path = os.path.join("folder", "subfolder", "file.txt")
print(new_path) # folder/subfolder/file.txt

# 获取绝对路径
print(os.path.abspath("test.txt"))

# 文件信息
print(os.path.getsize("test.txt")) # 文件大小(字节)

pathlib模块(推荐)

from pathlib import Path

# 创建Path对象
p = Path("folder/test.txt")

# 路径操作
print(p.parent) # folder
print(p.name) # test.txt
print(p.stem) # test
print(p.suffix) # .txt

# 拼接路径
new_path = Path("folder") / "subfolder" / "file.txt"

# 检查
print(p.exists())
print(p.is_file())
print(p.is_dir())

# 遍历目录
for file in Path(".").glob("*.py"):
print(file)

# 递归遍历
for file in Path(".").rglob("*.py"):
print(file)

# 读写文件
content = Path("test.txt").read_text(encoding="utf-8")
Path("output.txt").write_text("Hello", encoding="utf-8")

常用文件格式

JSON文件

import json

# 写入JSON
data = {"name": "张三", "age": 25, "skills": ["Python", "JavaScript"]}
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)

# 读取JSON
with open("data.json", "r", encoding="utf-8") as f:
data = json.load(f)
print(data["name"])

# 字符串转换
json_str = json.dumps(data, ensure_ascii=False)
data = json.loads(json_str)

CSV文件

import csv

# 写入CSV
data = [
["姓名", "年龄", "城市"],
["张三", 25, "北京"],
["李四", 30, "上海"]
]
with open("data.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(data)

# 读取CSV
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
print(row)

# 字典方式
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(row["姓名"], row["年龄"])

实战示例

示例1:日志记录

from datetime import datetime

def log(message, filename="app.log"):
"""记录日志"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"[{timestamp}] {message}\n"

with open(filename, "a", encoding="utf-8") as f:
f.write(log_entry)

log("程序启动")
log("处理数据...")
log("程序结束")

示例2:配置文件

import json

def load_config(filepath="config.json"):
"""加载配置"""
try:
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
return {}

def save_config(config, filepath="config.json"):
"""保存配置"""
with open(filepath, "w", encoding="utf-8") as f:
json.dump(config, f, ensure_ascii=False, indent=2)

# 使用
config = load_config()
config["theme"] = "dark"
save_config(config)

示例3:批量文件处理

from pathlib import Path

def batch_rename(folder, old_ext, new_ext):
"""批量修改文件扩展名"""
folder = Path(folder)
for file in folder.glob(f"*{old_ext}"):
new_name = file.with_suffix(new_ext)
file.rename(new_name)
print(f"重命名:{file.name} -> {new_name.name}")

# batch_rename("./images", ".jpeg", ".jpg")

练习

练习1:统计文件

# 统计文件的行数、单词数、字符数
def count_file(filepath):
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
lines = content.split("\n")
words = content.split()
chars = len(content)
return len(lines), len(words), chars

lines, words, chars = count_file("test.txt")
print(f"行数:{lines},单词数:{words},字符数:{chars}")

本章小结

  1. open()函数:打开文件,指定模式和编码
  2. with语句:自动关闭文件,推荐使用
  3. 读取方法:read、readline、readlines
  4. 写入方法:write、writelines
  5. 路径处理:os.path 或 pathlib
  6. 常用格式:JSON、CSV

下一步

学会了文件操作,下一章学习模块与包管理。

→ 继续阅读:24-模块与包管理