文件操作详解
学会读写文件,处理数据持久化
文件操作概述
为什么需要文件操作
- 数据持久化:程序结束后保存数据
- 配置管理:读取配置文件
- 日志记录:保存运行日志
- 数据交换:与其他程序交换数据
文件操作流程
# 1. 打开文件
# 2. 读取/写入
# 3. 关闭文件
# 传统方式
file = open("test.txt", "r")
content = file.read()
file.close()
# 推荐方式(with语句)
with open("test.txt", "r") as file:
content = file.read()
# 自动关闭
打开文件
open()函数
open(file, mode='r', encoding=None)
文件模式
| 模式 | 说明 |
|---|---|
| 'r' | 只读(默认),文件必须存在 |
| 'w' | 只写,会覆盖原内容,不存在则创建 |
| 'a' | 追加,在末尾添加内容 |
| 'x' | 创建新文件,已存在则报错 |
'r+' | 读写 |
'w+' | 读写,会清空原内容 |
'a+' | 读写追加 |
| 'b' | 二进制模式(如'rb', 'wb') |
编码
# 推荐指定编码(尤其是中文)
with open("test.txt", "r", encoding="utf-8") as f:
content = f.read()
读取文件
read() - 读取全部
with open("test.txt", "r", encoding="utf-8") as f:
content = f.read()
print(content)
# 读取指定字符数
with open("test.txt", "r", encoding="utf-8") as f:
first_10 = f.read(10) # 读取前10个字符
readline() - 读取一行
with open("test.txt", "r", encoding="utf-8") as f:
line1 = f.readline() # 读取第一行
line2 = f.readline() # 读取第二行
readlines() - 读取所有行
with open("test.txt", "r", encoding="utf-8") as f:
lines = f.readlines() # 返回列表
for line in lines:
print(line.strip()) # strip()去除换行符
逐行迭代(推荐)
# 内存友好,适合大文件
with open("test.txt", "r", encoding="utf-8") as f:
for line in f:
print(line.strip())
写入文件
write() - 写入字符串
# 覆盖写入
with open("output.txt", "w", encoding="utf-8") as f:
f.write("Hello, World!\n")
f.write("你好,世界!\n")
writelines() - 写入多行
lines = ["第一行\n", "第二行\n", "第三行\n"]
with open("output.txt", "w", encoding="utf-8") as f:
f.writelines(lines)
追加写入
with open("log.txt", "a", encoding="utf-8") as f:
f.write("新的日志记录\n")
文件指针
with open("test.txt", "r") as f:
# tell() - 获取当前位置
print(f.tell()) # 0
content = f.read(5)
print(f.tell()) # 5
# seek(offset, whence) - 移动指针
# whence: 0=开头, 1=当前, 2=结尾
f.seek(0) # 回到开头
print(f.tell()) # 0
文件与目录操作
os模块
import os
# 当前工作目录
print(os.getcwd())
# 切换目录
os.chdir("/path/to/dir")
# 列出目录内容
print(os.listdir("."))
# 创建目录
os.mkdir("new_folder")
os.makedirs("path/to/nested/folder") # 递归创建
# 删除
os.remove("file.txt") # 删除文件
os.rmdir("empty_folder") # 删除空目录
# 重命名
os.rename("old.txt", "new.txt")
# 检查存在
print(os.path.exists("test.txt"))
print(os.path.isfile("test.txt"))
print(os.path.isdir("folder"))
os.path模块
import os.path
path = "/Users/name/Documents/test.txt"
# 路径操作
print(os.path.dirname(path)) # /Users/name/Documents
print(os.path.basename(path)) # test.txt
print(os.path.split(path)) # ('/Users/name/Documents', 'test.txt')
print(os.path.splitext(path)) # ('/Users/name/Documents/test', '.txt')
# 路径拼接
new_path = os.path.join("folder", "subfolder", "file.txt")
print(new_path) # folder/subfolder/file.txt
# 获取绝对路径
print(os.path.abspath("test.txt"))
# 文件信息
print(os.path.getsize("test.txt")) # 文件大小(字节)
pathlib模块(推荐)
from pathlib import Path
# 创建Path对象
p = Path("folder/test.txt")
# 路径操作
print(p.parent) # folder
print(p.name) # test.txt
print(p.stem) # test
print(p.suffix) # .txt
# 拼接路径
new_path = Path("folder") / "subfolder" / "file.txt"
# 检查
print(p.exists())
print(p.is_file())
print(p.is_dir())
# 遍历目录
for file in Path(".").glob("*.py"):
print(file)
# 递归遍历
for file in Path(".").rglob("*.py"):
print(file)
# 读写文件
content = Path("test.txt").read_text(encoding="utf-8")
Path("output.txt").write_text("Hello", encoding="utf-8")
常用文件格式
JSON文件
import json
# 写入JSON
data = {"name": "张三", "age": 25, "skills": ["Python", "JavaScript"]}
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 读取JSON
with open("data.json", "r", encoding="utf-8") as f:
data = json.load(f)
print(data["name"])
# 字符串转换
json_str = json.dumps(data, ensure_ascii=False)
data = json.loads(json_str)
CSV文件
import csv
# 写入CSV
data = [
["姓名", "年龄", "城市"],
["张三", 25, "北京"],
["李四", 30, "上海"]
]
with open("data.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(data)
# 读取CSV
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
print(row)
# 字典方式
with open("data.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(row["姓名"], row["年龄"])
实战示例
示例1:日志记录
from datetime import datetime
def log(message, filename="app.log"):
"""记录日志"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"[{timestamp}] {message}\n"
with open(filename, "a", encoding="utf-8") as f:
f.write(log_entry)
log("程序启动")
log("处理数据...")
log("程序结束")
示例2:配置文件
import json
def load_config(filepath="config.json"):
"""加载配置"""
try:
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
return {}
def save_config(config, filepath="config.json"):
"""保存配置"""
with open(filepath, "w", encoding="utf-8") as f:
json.dump(config, f, ensure_ascii=False, indent=2)
# 使用
config = load_config()
config["theme"] = "dark"
save_config(config)
示例3:批量文件处理
from pathlib import Path
def batch_rename(folder, old_ext, new_ext):
"""批量修改文件扩展名"""
folder = Path(folder)
for file in folder.glob(f"*{old_ext}"):
new_name = file.with_suffix(new_ext)
file.rename(new_name)
print(f"重命名:{file.name} -> {new_name.name}")
# batch_rename("./images", ".jpeg", ".jpg")
练习
练习1:统计文件
# 统计文件的行数、单词数、字符数
def count_file(filepath):
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
lines = content.split("\n")
words = content.split()
chars = len(content)
return len(lines), len(words), chars
lines, words, chars = count_file("test.txt")
print(f"行数:{lines},单词数:{words},字符数:{chars}")
本章小结
- open()函数:打开文件,指定模式和编码
- with语句:自动关闭文件,推荐使用
- 读取方法:read、readline、readlines
- 写入方法:write、writelines
- 路径处理:os.path 或 pathlib
- 常用格式:JSON、CSV
下一步
学会了文件操作,下一章学习模块与包管理。
→ 继续阅读:24-模块与包管理