# AI的NLP学习/embedding/tensorboard

张开发
2026/4/9 5:04:20 15 分钟阅读

分享文章

# AI的NLP学习/embedding/tensorboard
import torchimport torch.nn as nnfrom torch.utils.tensorboard import SummaryWriterimport jiebaimport osimport shutil 加载自定义词表确保人名完整分词 jieba_tokenizer jieba.Tokenizer()custom_words [“龙文浩”, “张三”, “李四”, “王五”, “工程师”, “学生”, “教师”, “医生”]for word in custom_words:jieba_tokenizer.add_word(word)构建语料 词表def build_vocab():texts [“龙文浩是工程师”,“张三是学生”,“李四是教师”,“王五是医生”]words []for text in texts:words jieba_tokenizer.lcut(text)vocab sorted(list(set(words)))word2idx {w: i for i, w in enumerate(vocab)}print(“词表”, word2idx)return vocab, word2idx 构建简单 Embedding 模型 class EmbeddingModel(nn.Module):definit(self, vocab_size, embed_dim10):super().init()self.embedding nn.Embedding(vocab_size, embed_dim)def forward(self, x):return self.embedding(x) 主函数训练 TensorBoard 可视化 ifname ‘main’:vocab, word2idx build_vocab()vocab_size len(vocab)model EmbeddingModel(vocab_size, embed_dim10) word_indices torch.tensor(list(word2idx.values()), dtypetorch.long) with torch.no_grad(): embeddings model.embedding(word_indices) # 设置根目录 # 统一使用绝对路径避免相对路径歧义 log_dir os.path.abspath(rruns\embedding_visual) # 强制清空旧日志避免缓存/编码问题 if os.path.exists(log_dir): shutil.rmtree(log_dir) os.makedirs(log_dir, exist_okTrue) # 写入TensorBoard简化目录消除嵌套 writer SummaryWriter(log_dir) # 移除tag参数避免生成嵌套目录不指定global_step简化目录结构 writer.add_embedding( matembeddings, metadatavocab ) writer.close() print(\n✅ TensorBoard 文件已保存到, log_dir) print( 启动命令复制直接执行PowerShell/CMD通用) print(ftensorboard --logdir{log_dir} --host127.0.0.1 --port6006) print( 浏览器访问http://127.0.0.1:6006点击 PROJECTOR 标签查看词向量可视化)

更多文章