人工智能与机器学习
从入门到实践的AI学习路径
AI技术栈
基础库
├── NumPy (数值计算)
├── Pandas (数据处理)
└── Scikit-learn (传统ML)
深度学习框架
├── PyTorch (研究首选)
├── TensorFlow/Keras (工业应用)
└── JAX (高性能)
应用领域
├── 计算机视觉 (CV)
├── 自然语言处理 (NLP)
├── 推荐系统
└── 强化学习
机器学习基础
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
# 加载数据
df = pd.read_csv('data.csv')
X = df.drop('target', axis=1)
y = df['target']
# 数据分割
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# 特征缩放
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 训练模型
model = LogisticRegression()
model.fit(X_train_scaled, y_train)
# 预测与评估
y_pred = model.predict(X_test_scaled)
print(f"准确率: {accuracy_score(y_test, y_pred):.4f}")
print(classification_report(y_test, y_pred))
常用算法示例
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
# 决策树
dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)
# 随机森林
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
# 支持向量机
svm = SVC(kernel='rbf', C=1.0)
svm.fit(X_train_scaled, y_train)
# K近邻
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
# 模型比较
models = {
'Decision Tree': dt,
'Random Forest': rf,
'SVM': svm,
'KNN': knn
}
for name, model in models.items():
if name in ['SVM', 'KNN']:
score = model.score(X_test_scaled, y_test)
else:
score = model.score(X_test, y_test)
print(f"{name}: {score:.4f}")
深度学习:PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# 定义神经网络
class NeuralNetwork(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNetwork, self).__init__()
self.layer1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.layer2 = nn.Linear(hidden_size, hidden_size)
self.layer3 = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = self.layer1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer2(x)
x = self.relu(x)
x = self.layer3(x)
return x
# 准备数据
X_tensor = torch.FloatTensor(X_train_scaled)
y_tensor = torch.LongTensor(y_train.values)
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# 初始化模型
model = NeuralNetwork(input_size=X_train.shape[1], hidden_size=64, num_classes=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练循环
epochs = 100
for epoch in range(epochs):
model.train()
total_loss = 0
for batch_X, batch_y in dataloader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
total_loss += loss.item()
if (epoch + 1) % 20 == 0:
print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(dataloader):.4f}')
# 评估
model.eval()
with torch.no_grad():
X_test_tensor = torch.FloatTensor(X_test_scaled)
outputs = model(X_test_tensor)
_, predicted = torch.max(outputs, 1)
accuracy = (predicted.numpy() == y_test.values).mean()
print(f'测试准确率: {accuracy:.4f}')
图像分类:CNN
import torch.nn as nn
class CNN(nn.Module):
def __init__(self, num_classes=10):
super(CNN, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(128 * 4 * 4, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
自然语言处理
from transformers import pipeline
# 使用预训练模型
# 情感分析
classifier = pipeline("sentiment-analysis")
result = classifier("这个产品非常好用,我很喜欢!")
print(result)
# 文本生成
generator = pipeline("text-generation", model="gpt2")
text = generator("Once upon a time", max_length=50)
print(text)
# 问答系统
qa = pipeline("question-answering")
context = "Python是一种广泛使用的编程语言,由Guido van Rossum创建。"
question = "谁创建了Python?"
answer = qa(question=question, context=context)
print(answer)
学习路线图
第1阶段:数学基础 (1-2个月)
├── 线性代数
├── 概率统计
├── 微积分基础
└── 优化理论
第2阶段:机器学习 (2-3个月)
├── 监督学习算法
├── 无监督学习
├── 模型评估
└── 特征工程
第3阶段:深度学习 (3-4个月)
├── 神经网络基础
├── CNN图像处理
├── RNN/LSTM序列
└── Transformer
第4阶段:专业方向 (持续)
├── 计算机视觉
├── 自然语言处理
├── 推荐系统
└── 强化学习
本章小结
- ML基础:Scikit-learn快速上手
- 深度学习:PyTorch/TensorFlow
- 应用领域:CV、NLP、推荐系统
- 持续学习:跟进最新论文和技术
→ 继续阅读:40-游戏开发入门