当前位置：首页 > news >正文

从零实现基于Transformer的英译汉任务

news 来源：原创 2025/8/20 17:31:16

1. model.py（用的是上一篇文章的代码：从0搭建Transformer-CSDN博客）

import torch
import torch.nn as nn
import mathclass PositionalEncoding(nn.Module):def __init__ (self, d_model, dropout, max_len=5000):super(PositionalEncoding, self).__init__()self.dropout = nn.Dropout(p=dropout)# [[1, 2, 3],# [4, 5, 6],# [7, 8, 9]]pe = torch.zeros(max_len, d_model)# [[0],# [1],# [2]]position = torch.arange(0, max_len, dtype = torch.float).unsqueeze(1)div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))pe[:, 0::2] = torch.sin(position * div_term)pe[:, 1::2] = torch.cos(position * div_term)pe = pe.unsqueeze(0)# 位置编码固定，不更新参数# 保存模型时会保存缓冲区，在引入模型时缓冲区也被引入self.register_buffer('pe', pe)def forward(self, x):# 不计算梯度x = x + self.pe[:, :x.size(1)].requires_grad_(False)return xclass MultiHeadAttention(nn.Module):def __init__(self, d_model, num_heads):super(MultiHeadAttention, self).__init__()assert d_model % num_heads == 0self.d_k = d_model // num_headsself.num_heads = num_headsself.W_q = nn.Linear(d_model, d_model)self.W_k = nn.Linear(d_model, d_model)self.W_v = nn.Linear(d_model, d_model)self.W_o = nn.Linear(d_model, d_model)def forward(self, query, key, value, mask=None):batch_size = query.size(0)Q = self.W_q(query).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)K = self.W_k(key).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)V = self.W_v(value).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)if mask is not None:scores = scores.masked_fill(mask == 0, -1e9)attn_weights = torch.softmax(scores, dim=-1)context = torch.matmul(attn_weights, V)context = context.transpose(1, 2).contiguous().view(batch_size, -1, self.d_k * self.num_heads)return self.W_o(context)class EncoderLayer(nn.Module):def __init__(self, d_model, num_heads, d_ff, dropout = 0.1):super().__init__()self.attn = MultiHeadAttention(d_model, num_heads)self.feed_forward = nn.Sequential(nn.Linear(d_model, d_ff),nn.ReLU(),nn.Linear(d_ff, d_model))self.norm1 = nn.LayerNorm(d_model)self.norm2 = nn.LayerNorm(d_model)self.dropout = nn.Dropout(dropout)def forward(self, x, mask=None):attn_output = self.attn(x, x, x, mask)x = self.norm1(x + self.dropout(attn_output))ff_output = self.feed_forward(x)x = self.norm2(x + self.dropout(ff_output))return xclass DecoderLayer(nn.Module):def __init__(self, d_model, num_heads, d_ff, dropout=0.1):super(DecoderLayer, self).__init__()self.self_attn = MultiHeadAttention(d_model, num_heads)self.cross_attn = MultiHeadAttention(d_model, num_heads)self.norm1 = nn.LayerNorm(d_model)self.norm2 = nn.LayerNorm(d_model)self.norm3 = nn.LayerNorm(d_model)self.feed_forward = nn.Sequential(nn.Linear(d_model, d_ff),nn.ReLU(),nn.Linear(d_ff, d_model))self.dropout = nn.Dropout(dropout)def forward(self, x, enc_output, src_mask, tgt_mask):attn_output = self.self_attn(x, x, x, tgt_mask)x = self.norm1(x + self.dropout(attn_output))attn_output = self.cross_attn(x, enc_output, enc_output, src_mask)x = self.norm2(x + self.dropout(attn_output))ff_output = self.feed_forward(x)x = self.norm3(x + self.dropout(ff_output))return xclass Transformer(nn.Module):def __init__(self, src_vocab_size, tgt_vocab_size, d_model=512, num_heads=8, num_layers=6, d_ff=2048, dropout=0.1):super(Transformer, self).__init__()self.encoder_embed = nn.Embedding(src_vocab_size, d_model)self.decoder_embed = nn.Embedding(tgt_vocab_size, d_model)self.pos_encoder = PositionalEncoding(d_model, dropout)self.encoder_layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])self.decoder_layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])self.fc_out = nn.Linear(d_model, tgt_vocab_size)def encode(self, src, src_mask):src_embeded = self.encoder_embed(src)src = self.pos_encoder(src_embeded)for layer in self.encoder_layers:src = layer(src, src_mask)return srcdef decode(self, tgt, enc_output, src_mask, tgt_mask):tgt_embeded = self.decoder_embed(tgt)tgt = self.pos_encoder(tgt_embeded)for layer in self.decoder_layers:tgt = layer(tgt, enc_output, src_mask, tgt_mask)return tgtdef forward(self, src, tgt, src_mask, tgt_mask):enc_output = self.encode(src, src_mask)dec_output = self.decode(tgt, enc_output, src_mask, tgt_mask)logits = self.fc_out(dec_output)return logits

2. train.py（数据量很大，使用其中一部分进行训练和验证，数据集来源：中英互译数据集(translation2019zh)_数据集-飞桨AI Studio星河社区）

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from model import Transformer, PositionalEncoding
import math
import numpy as np
import os
import json
from tqdm import tqdm# --- Data Loading for JSON Lines format ---
# MODIFIED: Added max_lines parameter
def load_data_from_jsonl(file_path, max_lines=None): # <--- ADD max_lines parameter"""Loads English and Chinese sentences from a JSON Lines file, up to max_lines."""en_sentences, zh_sentences = [], []print(f"Loading data from {file_path}..." + (f" (up to {max_lines} lines)" if max_lines else ""))if not os.path.exists(file_path):print(f"Error: Data file not found at {file_path}")return [], []try:with open(file_path, 'r', encoding='utf-8') as f:lines_processed = 0for line in tqdm(f, desc=f"Reading {os.path.basename(file_path)}", total=max_lines if max_lines else None):if max_lines is not None and lines_processed >= max_lines: # <--- CHECK max_linesprint(f"\nReached max_lines limit of {max_lines} for {file_path}.")breaktry:data = json.loads(line.strip())if 'english' in data and 'chinese' in data:en_sentences.append(data['english'])zh_sentences.append(data['chinese'])lines_processed += 1 # <--- INCREMENT lines_processedelse:# This print can be noisy, consider removing or logging for large files# print(f"Warning: Skipping line due to missing 'english' or 'chinese' key: {line.strip()}")passexcept json.JSONDecodeError:# print(f"Warning: Skipping invalid JSON line: {line.strip()}")passexcept Exception as e:print(f"An error occurred while reading {file_path}: {e}")return [], []print(f"Loaded {len(en_sentences)} sentence pairs from {file_path}.")return en_sentences, zh_sentences# ... (Vocab, TranslationDataset, collate_fn, create_masks classes/functions remain the same) ...
# --- Vocab Class (Consider Subword Tokenization for large datasets later) ---
class Vocab:def __init__(self, sentences, min_freq=1, special_tokens=None):self.stoi = {}self.itos = {}if special_tokens is None:# Define PAD first as index 0 is often assumed for paddingspecial_tokens = ['<pad>', '<unk>', '<sos>', '<eos>']self.special_tokens = special_tokens# Initialize special tokens first to guarantee their indicesidx = 0for token in special_tokens:self.stoi[token] = idxself.itos[idx] = tokenidx += 1# Count character frequenciescounter = {}print("Counting character frequencies for vocab...")for s in tqdm(sentences, desc="Processing sentences for vocab"):if isinstance(s, str):for char in s:counter[char] = counter.get(char, 0) + 1# Add other tokens meeting min_freq, sorted by frequency# Filter out already added special tokens before sortingnon_special_counts = {token: count for token, count in counter.items() if token not in self.special_tokens}sorted_tokens = sorted(non_special_counts.items(), key=lambda item: item[1], reverse=True)for token, count in tqdm(sorted_tokens, desc="Building vocab mapping"):if count >= min_freq:# Check again if it's not a special token (redundant but safe)if token not in self.stoi:self.stoi[token] = idxself.itos[idx] = tokenidx += 1# Ensure <unk> exists and points to the correct index if it was overriddenif '<unk>' in self.special_tokens:unk_intended_idx = self.special_tokens.index('<unk>')if self.stoi.get('<unk>') != unk_intended_idx or self.itos.get(unk_intended_idx) != '<unk>':print(f"Warning: <unk> token mapping might be inconsistent. Forcing index {unk_intended_idx}.")# Find current mapping if any and remove itcurrent_unk_mapping_val = self.stoi.pop('<unk>', None) # Get the index value# Remove from itos if the index was indeed mapped to something else or old <unk>if current_unk_mapping_val is not None and self.itos.get(current_unk_mapping_val) == '<unk>':# If itos[idx] was already <unk>, it's fine. If it was something else, we might have a problem.# This logic ensures itos[unk_intended_idx] will be <unk># and stoi['<unk>'] will be unk_intended_idx# We might overwrite another token if it landed on unk_intended_idx before <unk># But special tokens should have priority.if self.itos.get(unk_intended_idx) is not None and self.itos.get(unk_intended_idx) != '<unk>':# A non-<unk> token is at the intended <unk> index. Find its stoi entry and remove.token_at_unk_idx = self.itos.get(unk_intended_idx)if token_at_unk_idx in self.stoi and self.stoi[token_at_unk_idx] == unk_intended_idx:del self.stoi[token_at_unk_idx]self.stoi['<unk>'] = unk_intended_idxself.itos[unk_intended_idx] = '<unk>'def __len__(self):return len(self.itos) # itos should be the definitive source of size# --- TranslationDataset Class (No changes needed) ---
class TranslationDataset(Dataset):def __init__(self, en_sentences, zh_sentences, src_vocab, tgt_vocab):self.src_data = []self.tgt_data = []print("Creating dataset tensors...")# Get special token indices oncesrc_sos_idx = src_vocab.stoi['<sos>']src_eos_idx = src_vocab.stoi['<eos>']src_unk_idx = src_vocab.stoi['<unk>']tgt_sos_idx = tgt_vocab.stoi['<sos>']tgt_eos_idx = tgt_vocab.stoi['<eos>']tgt_unk_idx = tgt_vocab.stoi['<unk>']# Use tqdm for progressfor en, zh in tqdm(zip(en_sentences, zh_sentences), total=len(en_sentences), desc="Vectorizing data"):src_ids = [src_sos_idx] + [src_vocab.stoi.get(c, src_unk_idx) for c in en] + [src_eos_idx]tgt_ids = [tgt_sos_idx] + [tgt_vocab.stoi.get(c, tgt_unk_idx) for c in zh] + [tgt_eos_idx]# Consider adding length filtering here if not done during preprocessingself.src_data.append(torch.LongTensor(src_ids))self.tgt_data.append(torch.LongTensor(tgt_ids))print("Dataset tensors created.")def __len__(self):return len(self.src_data)def __getitem__(self, idx):return self.src_data[idx], self.tgt_data[idx]# --- Collate Function (Ensure PAD index is correct) ---
def collate_fn(batch, pad_idx=0): # Pass pad_idx explicitly or get from vocab"""Pads sequences within a batch."""src_batch, tgt_batch = zip(*batch)# Pad sequences - Use batch_first=True as it's often more intuitivesrc_batch_padded = nn.utils.rnn.pad_sequence(src_batch, padding_value=pad_idx, batch_first=True)tgt_batch_padded = nn.utils.rnn.pad_sequence(tgt_batch, padding_value=pad_idx, batch_first=True)return src_batch_padded, tgt_batch_padded # Return (Batch, Seq)# --- Mask Creation Function (Adjust for batch_first=True) ---
def create_masks(src, tgt, pad_idx):"""Creates masks for source and target sequences (assuming batch_first=True)."""# src shape: (Batch, Src_Seq)# tgt shape: (Batch, Tgt_Seq)device = src.device# Source Padding Mask: (Batch, 1, 1, Src_Seq)src_mask = (src != pad_idx).unsqueeze(1).unsqueeze(2)# Target Masks# Target Padding Mask: (Batch, 1, Tgt_Seq, 1)tgt_pad_mask = (tgt != pad_idx).unsqueeze(1).unsqueeze(-1) # Add dim for broadcasting with look_ahead# Look-ahead Mask: (Tgt_Seq, Tgt_Seq) -> (1, 1, Tgt_Seq, Tgt_Seq) for broadcastingtgt_seq_length = tgt.size(1)look_ahead_mask = (1 - torch.triu(torch.ones((tgt_seq_length, tgt_seq_length), device=device), diagonal=1)).bool().unsqueeze(0).unsqueeze(0) # Add Batch and Head dims# Combined Target Mask: (Batch, 1, Tgt_Seq, Tgt_Seq)tgt_mask = tgt_pad_mask & look_ahead_maskreturn src_mask.to(device), tgt_mask.to(device)# --- Main Execution Block ---
if __name__ == '__main__':# --- Configuration ---TRAIN_DATA_PATH = 'data/translation2019zh_train.json'VALID_DATA_PATH = 'data/translation2019zh_valid.json'MODEL_SAVE_PATH = 'best_model_subset.pth' # New model name for subset# MODIFIED: Define how many lines to use# For example, 100,000 for training and 10,000 for validation# Adjust these numbers based on your resources and desired training speedMAX_TRAIN_LINES = 1000000MAX_VALID_LINES = 100000# Hyperparameters (You might want smaller model for smaller data subset)BATCH_SIZE = 32NUM_EPOCHS = 10 # Can increase epochs for smaller datasetLEARNING_RATE = 1e-4# Consider using smaller model for faster iteration on subsetD_MODEL = 256NUM_HEADS = 8  # Must be divisor of d_modelNUM_LAYERS = 3D_FF = 1024    # Usually 4 * D_MODELDROPOUT = 0.1MIN_FREQ = 1   # For smaller datasets, min_freq=1 might be okayPRINT_FREQ = 100 # Print more often for smaller datasetsDEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')print(f"Using device: {DEVICE}")# --- Load Data (using the max_lines parameter) ---print(f"Loading subset of training data (up to {MAX_TRAIN_LINES} lines)...")train_en_sentences, train_zh_sentences = load_data_from_jsonl(TRAIN_DATA_PATH, max_lines=MAX_TRAIN_LINES)if not train_en_sentences:print("No training data loaded. Exiting.")exit()print(f"Loading subset of validation data (up to {MAX_VALID_LINES} lines)...")val_en_sentences, val_zh_sentences = load_data_from_jsonl(VALID_DATA_PATH, max_lines=MAX_VALID_LINES)if not val_en_sentences:print("Warning: No validation data loaded. Proceeding without validation.")# --- Build Vocabularies (ONLY from the training data subset) ---print("Building vocabularies from training data subset...")src_vocab = Vocab(train_en_sentences, min_freq=MIN_FREQ)tgt_vocab = Vocab(train_zh_sentences, min_freq=MIN_FREQ)print(f"Source vocab size: {len(src_vocab)}")print(f"Target vocab size: {len(tgt_vocab)}")PAD_IDX = src_vocab.stoi['<pad>']if PAD_IDX != 0 or tgt_vocab.stoi['<pad>'] != 0:print("Error: PAD index is not 0. Collate function and loss needs adjustment.")exit()# --- Create Datasets ---print("Creating training dataset...")train_dataset = TranslationDataset(train_en_sentences, train_zh_sentences, src_vocab, tgt_vocab)if val_en_sentences:print("Creating validation dataset...")val_dataset = TranslationDataset(val_en_sentences, val_zh_sentences, src_vocab, tgt_vocab)val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=lambda b: collate_fn(b, PAD_IDX))print(f"Train size: {len(train_dataset)}, Validation size: {len(val_dataset)}")else:val_loader = Noneprint(f"Train size: {len(train_dataset)} (No validation set)")train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=lambda b: collate_fn(b, PAD_IDX))# --- Initialize Model ---print("Initializing model...")model = Transformer(src_vocab_size=len(src_vocab),tgt_vocab_size=len(tgt_vocab),d_model=D_MODEL,num_heads=NUM_HEADS,num_layers=NUM_LAYERS,d_ff=D_FF,dropout=DROPOUT).to(DEVICE)def count_parameters(model):return sum(p.numel() for p in model.parameters() if p.requires_grad)print(f'The model has {count_parameters(model):,} trainable parameters')optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.98), eps=1e-9)criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)# --- Training Loop ---best_val_loss = float('inf')print("Starting training on data subset...")for epoch in range(NUM_EPOCHS):model.train()epoch_loss = 0train_iterator = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} Training")for i, (src, tgt) in enumerate(train_iterator):src = src.to(DEVICE)tgt = tgt.to(DEVICE)tgt_input = tgt[:, :-1]tgt_output = tgt[:, 1:]src_mask, tgt_mask = create_masks(src, tgt_input, PAD_IDX)logits = model(src, tgt_input, src_mask, tgt_mask)output_dim = logits.shape[-1]logits_reshaped = logits.contiguous().view(-1, output_dim)tgt_output_reshaped = tgt_output.contiguous().view(-1)loss = criterion(logits_reshaped, tgt_output_reshaped)optimizer.zero_grad()loss.backward()torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)optimizer.step()epoch_loss += loss.item()train_iterator.set_postfix(loss=loss.item())avg_train_loss = epoch_loss / len(train_loader)if val_loader:model.eval()val_loss = 0val_iterator = tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} Validation")with torch.no_grad():for src, tgt in val_iterator:src = src.to(DEVICE)tgt = tgt.to(DEVICE)tgt_input = tgt[:, :-1]tgt_output = tgt[:, 1:]src_mask, tgt_mask = create_masks(src, tgt_input, PAD_IDX)logits = model(src, tgt_input, src_mask, tgt_mask)output_dim = logits.shape[-1]logits_reshaped = logits.contiguous().view(-1, output_dim)tgt_output_reshaped = tgt_output.contiguous().view(-1)loss = criterion(logits_reshaped, tgt_output_reshaped)val_loss += loss.item()val_iterator.set_postfix(loss=loss.item())avg_val_loss = val_loss / len(val_loader)print(f'\nEpoch {epoch+1} Summary: Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')if avg_val_loss < best_val_loss:print(f"Validation loss decreased ({best_val_loss:.4f} --> {avg_val_loss:.4f}). Saving model to {MODEL_SAVE_PATH}...")best_val_loss = avg_val_losstorch.save({'model_state_dict': model.state_dict(),'src_vocab': src_vocab,'tgt_vocab': tgt_vocab,'epoch': epoch,'optimizer_state_dict': optimizer.state_dict(),'loss': best_val_loss,'config': {'d_model': D_MODEL, 'num_heads': NUM_HEADS, 'num_layers': NUM_LAYERS,'d_ff': D_FF, 'dropout': DROPOUT,'src_vocab_size': len(src_vocab), 'tgt_vocab_size': len(tgt_vocab),'max_train_lines': MAX_TRAIN_LINES, 'max_valid_lines': MAX_VALID_LINES}}, MODEL_SAVE_PATH)else:print(f'\nEpoch {epoch+1} Summary: Train Loss: {avg_train_loss:.4f}')print(f"Saving model checkpoint to {MODEL_SAVE_PATH}...")torch.save({'model_state_dict': model.state_dict(), 'src_vocab': src_vocab, 'tgt_vocab': tgt_vocab,'epoch': epoch, 'optimizer_state_dict': optimizer.state_dict(), 'loss': avg_train_loss,'config': {'d_model': D_MODEL, 'num_heads': NUM_HEADS, 'num_layers': NUM_LAYERS,'d_ff': D_FF, 'dropout': DROPOUT,'src_vocab_size': len(src_vocab), 'tgt_vocab_size': len(tgt_vocab),'max_train_lines': MAX_TRAIN_LINES, 'max_valid_lines': MAX_VALID_LINES}}, MODEL_SAVE_PATH)print("Training complete on data subset!")

3. predict.py（模型预测）

# predict.py
import torch
import torch.nn as nn
import numpy as np
import sys
import os
import json # Keep json import just in case, though not used directly here# --- Attempt to import necessary components ---
try:from model import Transformer, PositionalEncoding# Import Vocab from the updated train.pyfrom train import Vocab, create_masks # Import create_masks if needed, but translate usually recreates its own simpler masks
except ImportError as e:print(f"Error importing necessary modules: {e}")print("Please ensure model.py and train.py are in the Python path and have the necessary definitions.")sys.exit(1)# --- Configuration ---
# !!! IMPORTANT: Use the path to the model saved by the *new* training script !!!
CHECKPOINT_PATH = 'best_model_subset.pth'
MAX_LENGTH = 60    # Maximum length of generated translation
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')print(f"Using device: {DEVICE}")
print(f"Loading checkpoint from: {CHECKPOINT_PATH}")# --- Load Checkpoint and Vocab ---
if not os.path.exists(CHECKPOINT_PATH):print(f"Error: Checkpoint file not found at {CHECKPOINT_PATH}")sys.exit(1)try:checkpoint = torch.load(CHECKPOINT_PATH, map_location=DEVICE)print("Checkpoint loaded successfully.")
except Exception as e:print(f"Error loading checkpoint file: {e}")sys.exit(1)# --- Validate Checkpoint Contents ---
required_keys = ['model_state_dict', 'src_vocab', 'tgt_vocab']
# Also check for 'config' if you saved it, otherwise get params manually
if 'config' in checkpoint:required_keys.append('config')for key in required_keys:if key not in checkpoint:print(f"Error: Required key '{key}' not found in the checkpoint.")sys.exit(1)# --- Extract Vocab and Model Config ---
try:src_vocab = checkpoint['src_vocab']tgt_vocab = checkpoint['tgt_vocab']assert isinstance(src_vocab, Vocab) and isinstance(tgt_vocab, Vocab)PAD_IDX = src_vocab.stoi.get('<pad>', 0) # Use src_vocab pad index# Get model hyperparameters from checkpoint if savedif 'config' in checkpoint:config = checkpoint['config']D_MODEL = config['d_model']NUM_HEADS = config['num_heads']NUM_LAYERS = config['num_layers']D_FF = config['d_ff']DROPOUT = config['dropout']SRC_VOCAB_SIZE = config['src_vocab_size']TGT_VOCAB_SIZE = config['tgt_vocab_size']print("Model configuration loaded from checkpoint.")# Verify vocab sizes match loaded vocabsif SRC_VOCAB_SIZE != len(src_vocab) or TGT_VOCAB_SIZE != len(tgt_vocab):print("Warning: Vocab size in config mismatches loaded vocab length!")print(f"Config Src:{SRC_VOCAB_SIZE}/Tgt:{TGT_VOCAB_SIZE}, Loaded Src:{len(src_vocab)}/Tgt:{len(tgt_vocab)}")# Use lengths from loaded vocabs as they are definitiveSRC_VOCAB_SIZE = len(src_vocab)TGT_VOCAB_SIZE = len(tgt_vocab)else:# !!! Fallback: Manually define parameters - MUST MATCH TRAINING !!!print("Warning: Model config not found in checkpoint. Using manually defined parameters.")print("Ensure these match the parameters used during training!")D_MODEL = 512NUM_HEADS = 8NUM_LAYERS = 6D_FF = 2048DROPOUT = 0.1SRC_VOCAB_SIZE = len(src_vocab) # Use length from loaded vocabTGT_VOCAB_SIZE = len(tgt_vocab) # Use length from loaded vocabprint(f"Source vocab size: {len(src_vocab)}")print(f"Target vocab size: {len(tgt_vocab)}")
except Exception as e:print(f"Error processing vocabulary or config from checkpoint: {e}")sys.exit(1)# --- Initialize Model ---
try:model = Transformer(src_vocab_size=SRC_VOCAB_SIZE,tgt_vocab_size=TGT_VOCAB_SIZE,d_model=D_MODEL,num_heads=NUM_HEADS,num_layers=NUM_LAYERS,d_ff=D_FF,dropout=DROPOUT # Dropout value is less critical for eval mode).to(DEVICE)print("Model initialized.")def count_parameters(model):return sum(p.numel() for p in model.parameters())print(f'The model has {count_parameters(model):,} total parameters.')except Exception as e:print(f"Error initializing the Transformer model: {e}")sys.exit(1)# --- Load Model State ---
try:model.load_state_dict(checkpoint['model_state_dict'])model.eval() # Set model to evaluation modeprint("Model state loaded successfully.")
except RuntimeError as e:print(f"Error loading model state_dict: {e}")print("This *strongly* indicates a mismatch between the loaded checkpoint's architecture")print("(implicit in state_dict keys/shapes) and the model initialized here.")print("Verify that the hyperparameters (D_MODEL, NUM_HEADS, NUM_LAYERS, D_FF, vocab sizes)")print("match *exactly* those used when the checkpoint was saved.")sys.exit(1)
except Exception as e:print(f"An unexpected error occurred while loading model state: {e}")sys.exit(1)# --- Translate Function (largely unchanged, ensure correct mask creation for batch size 1) ---
def translate(sentence: str, model: nn.Module, src_vocab: Vocab, tgt_vocab: Vocab, device: torch.device, max_length: int = 50):"""Translates a source sentence using the trained transformer model."""model.eval() # Ensure model is in eval mode# --- Input Preprocessing ---if not isinstance(sentence, str): return "[Error: Invalid Input Type]"src_sos_idx = src_vocab.stoi.get('<sos>')src_eos_idx = src_vocab.stoi.get('<eos>')src_unk_idx = src_vocab.stoi.get('<unk>', 0) # Default to 0 (usually PAD) if missingsrc_pad_idx = src_vocab.stoi.get('<pad>', 0)if src_sos_idx is None or src_eos_idx is None: return "[Error: Bad Src Vocab]"src_tokens = ['<sos>'] + list(sentence) + ['<eos>']src_ids = [src_vocab.stoi.get(token, src_unk_idx) for token in src_tokens]src_tensor = torch.LongTensor(src_ids).unsqueeze(0).to(device) # Shape: (1, src_len)# --- Create Source Mask ---src_mask = (src_tensor != src_pad_idx).unsqueeze(1).unsqueeze(2).to(device) # Shape: (1, 1, 1, src_len)# --- Encode Source ---with torch.no_grad():try:enc_output = model.encode(src_tensor, src_mask) # Shape: (1, src_len, d_model)except Exception as e:print(f"Error during model encoding: {e}")return "[Error: Encoding Failed]"# --- Decode Target (Greedy Search) ---tgt_sos_idx = tgt_vocab.stoi.get('<sos>')tgt_eos_idx = tgt_vocab.stoi.get('<eos>')tgt_pad_idx = tgt_vocab.stoi.get('<pad>', 0)if tgt_sos_idx is None or tgt_eos_idx is None: return "[Error: Bad Tgt Vocab]"tgt_ids = [tgt_sos_idx] # Start with <sos>for i in range(max_length):tgt_tensor = torch.LongTensor(tgt_ids).unsqueeze(0).to(device) # Shape: (1, current_tgt_len)tgt_len = tgt_tensor.size(1)# --- Create Target Masks (for batch size 1) ---# 1. Target Padding Mask (probably all True here, but good practice)# Shape: (1, 1, tgt_len, 1)tgt_pad_mask = (tgt_tensor != tgt_pad_idx).unsqueeze(1).unsqueeze(-1)# 2. Look-ahead Mask# Shape: (1, tgt_len, tgt_len) -> needs head dim (1, 1, tgt_len, tgt_len)look_ahead_mask = (1 - torch.triu(torch.ones(tgt_len, tgt_len, device=device), diagonal=1)).bool().unsqueeze(0).unsqueeze(0) # Add Batch and Head dim# 3. Combined Target Mask: Shape (1, 1, tgt_len, tgt_len)combined_tgt_mask = tgt_pad_mask & look_ahead_mask# --- Decode Step ---with torch.no_grad():try:# src_mask (1, 1, 1, src_len) broadcasts fine# combined_tgt_mask (1, 1, tgt_len, tgt_len) broadcasts fineoutput = model.decode(tgt_tensor, enc_output, src_mask, combined_tgt_mask)logits = model.fc_out(output[:, -1, :]) # Use only the last output token's logitsexcept Exception as e:print(f"Error during model decoding step {i}: {e}")# Potentially show partial translation?# partial_translation = "".join([tgt_vocab.itos.get(idx, '?') for idx in tgt_ids[1:]]) # Skip SOS# return f"[Error: Decoding Failed at step {i}. Partial: {partial_translation}]"return "[Error: Decoding Failed]"pred_token_id = logits.argmax(1).item()tgt_ids.append(pred_token_id)# Stop if <eos> token is predictedif pred_token_id == tgt_eos_idx:break# --- Post-process Output ---special_indices = {tgt_vocab.stoi.get(tok, -999)for tok in ['<sos>', '<eos>', '<pad>']}# Use get() for safety, default to <unk> if ID somehow not in itostranslated_tokens = [tgt_vocab.itos.get(idx, '<unk>') for idx in tgt_ids if idx not in special_indices]return "".join(translated_tokens)test_sentences = ["Hello!","How are you?","This is a test.","He plays football every weekend.","She has a beautiful dog.","The sun is shining brightly.","I like to read books.","They are going to the park.","My favorite color is blue.","We eat dinner at seven.","The cat sleeps on the mat.","Birds sing in the morning.","He can swim very well.","She writes a letter.","The car is red.","I see a big tree.","They watch television.","My brother is tall.","We learn English at school.","The flowers smell good.","He drinks milk every day.","She helps her mother.","The book is on the table.","I have two pencils.","They live in a small house.","My father works hard.","We play games together.","The moon is bright tonight.","He wears a green shirt.","She dances gracefully.","The fish swims in the water.","I want an apple.","They visit their grandparents.","My sister plays the piano.","We go to bed early.","The sky is clear.","He listens to music.","She draws a nice picture.","The bus stops here.","I feel happy today.","They build a sandcastle.","My friend is kind.","We love to travel.","The baby is crying.","He eats an orange.","She cleans her room.","The door is open.","I can ride a bike.","They run in the field.","My teacher is helpful.","We study science.","The stars are far away.","He tells a funny story.","She wears a pretty dress.","The train is fast.","I understand the lesson.","They sing a happy song.","My shoes are new.","We walk to the store.","The food is delicious.","He reads a newspaper.","She looks at the birds.","The window is closed.","I need some water.","They plant a tree.","My dog likes to play fetch.","We visit the museum.","The weather is warm.","He fixes the broken toy.","She calls her friend.","The grass is green.","I like ice cream.","They go on a holiday.","My mother cooks tasty food.","We have a picnic.","The river flows slowly.","He throws the ball.","She smiles at me.","The mountain is high.","I lost my key.","They help the old man.","My garden is beautiful.","We share our toys.","The answer is simple.","He drives a blue car.","She paints a landscape.","The clock is on the wall.","I am learning to code.","They make a snowman.","My homework is easy.","We clean the house.","The bird has a nest.","He catches a fish.","She studies for the exam.","The bridge is long.","I want to sleep.","They are good friends.","My cat is very playful.","We are going to the beach.","The coffee is hot.","He gives her a gift."
]print("\n--- Starting Translation Examples ---")
for sentence in test_sentences:print("-" * 20)print(f"Input:      {sentence}")translation = translate(sentence, model, src_vocab, tgt_vocab, DEVICE, max_length=MAX_LENGTH)print(f"Translation: {translation}")print("-" * 20)
print("Prediction finished.")

predict.py运行结果展示：

root@autodl-container-de94439c34-d719190d:~# python predict.py
Using device: cpu
Loading checkpoint from: best_model_subset.pth
Checkpoint loaded successfully.
Model configuration loaded from checkpoint.
Source vocab size: 2776
Target vocab size: 8209
Model initialized.
The model has 10,451,473 total parameters.
Model state loaded successfully.--- Starting Translation Examples ---
--------------------
Input:      Hello!
Translation: 你好！
--------------------
Input:      How are you?
Translation: 你怎么样？
--------------------
Input:      This is a test.
Translation: 这是一个测试。
--------------------
Input:      He plays football every weekend.
Translation: 他每周都踢足球。
--------------------
Input:      She has a beautiful dog.
Translation: 她有一只美丽的狗。
--------------------
Input:      The sun is shining brightly.
Translation: 太阳光明亮了。
--------------------
Input:      I like to read books.
Translation: 我喜欢读书。
--------------------
Input:      They are going to the park.
Translation: 他们正在去公园。
--------------------
Input:      My favorite color is blue.
Translation: 我最喜欢的颜色是蓝色。
--------------------
Input:      We eat dinner at seven.
Translation: 我们吃晚饭。
--------------------
Input:      The cat sleeps on the mat.
Translation: 猫睡在垫上。
--------------------
Input:      Birds sing in the morning.
Translation: 鸟在早晨唱歌。
--------------------
Input:      He can swim very well.
Translation: 他可以很好地游泳。
--------------------
Input:      She writes a letter.
Translation: 她写信。
--------------------
Input:      The car is red.
Translation: 车是红色的。
--------------------
Input:      I see a big tree.
Translation: 我看见一棵大树。
--------------------
Input:      They watch television.
Translation: 他们看电视。
--------------------
Input:      My brother is tall.
Translation: 我的哥哥高。
--------------------
Input:      We learn English at school.
Translation: 我们学习英语。
--------------------
Input:      The flowers smell good.
Translation: 花香气味好。
--------------------
Input:      He drinks milk every day.
Translation: 他每天喝牛奶。
--------------------
Input:      She helps her mother.
Translation: 她帮忙妈妈。
--------------------
Input:      The book is on the table.
Translation: 这本书是桌子上的。
--------------------
Input:      I have two pencils.
Translation: 我有两个铅笔。
--------------------
Input:      They live in a small house.
Translation: 他们住在一个小房子里。
--------------------
Input:      My father works hard.
Translation: 我爸爸爸很努力。
--------------------
Input:      We play games together.
Translation: 我们玩游戏。
--------------------
Input:      The moon is bright tonight.
Translation: 月亮今晚是明亮的。
--------------------
Input:      He wears a green shirt.
Translation: 他穿着绿色的衬衫。
--------------------
Input:      She dances gracefully.
Translation: 她很喜欢跳舞。
--------------------
Input:      The fish swims in the water.
Translation: 鱼在水里游泳。
--------------------
Input:      I want an apple.
Translation: 我想要一个苹果。
--------------------
Input:      They visit their grandparents.
Translation: 他们访问他们的祖父母。
--------------------
Input:      My sister plays the piano.
Translation: 我的妹妹打钢琴。
--------------------
Input:      We go to bed early.
Translation: 我们早些时候睡觉。
--------------------
Input:      The sky is clear.
Translation: 天空清晰。
--------------------
Input:      He listens to music.
Translation: 他听音乐。
--------------------
Input:      She draws a nice picture.
Translation: 她画了一张美丽的照片。
--------------------
Input:      The bus stops here.
Translation: 公共汽车停下来。
--------------------
Input:      I feel happy today.
Translation: 今天我感到快乐。
--------------------
Input:      They build a sandcastle.
Translation: 他们建造了一个沙子。
--------------------
Input:      My friend is kind.
Translation: 我的朋友是个好的。
--------------------
Input:      We love to travel.
Translation: 我们喜欢旅行。
--------------------
Input:      The baby is crying.
Translation: 这个宝宝正在哭泣。
--------------------
Input:      He eats an orange.
Translation: 他吃了一个橙色。
--------------------
Input:      She cleans her room.
Translation: 她洁净房间。
--------------------
Input:      The door is open.
Translation: 门开了。
--------------------
Input:      I can ride a bike.
Translation: 我可以骑自行车。
--------------------
Input:      They run in the field.
Translation: 他们在田里跑。
--------------------
Input:      My teacher is helpful.
Translation: 老师很有帮助。
--------------------
Input:      We study science.
Translation: 我们研究科学。
--------------------
Input:      The stars are far away.
Translation: 星星远远远。
--------------------
Input:      He tells a funny story.
Translation: 他告诉一个有趣的故事。
--------------------
Input:      She wears a pretty dress.
Translation: 她穿着一件衣服。
--------------------
Input:      The train is fast.
Translation: 火车快速。
--------------------
Input:      I understand the lesson.
Translation: 我理解课程。
--------------------
Input:      They sing a happy song.
Translation: 他们唱了一首快乐的歌。
--------------------
Input:      My shoes are new.
Translation: 我的鞋子是新的。
--------------------
Input:      We walk to the store.
Translation: 我们走到商店。
--------------------
Input:      The food is delicious.
Translation: 食物是美味的。
--------------------
Input:      He reads a newspaper.
Translation: 他读了一篇报纸。
--------------------
Input:      She looks at the birds.
Translation: 她看着鸟儿。
--------------------
Input:      The window is closed.
Translation: 窗户闭上了。
--------------------
Input:      I need some water.
Translation: 我需要一些水。
--------------------
Input:      They plant a tree.
Translation: 他们种了树。
--------------------
Input:      My dog likes to play fetch.
Translation: 我的狗喜欢玩耍。
--------------------
Input:      We visit the museum.
Translation: 我们访问博物馆。
--------------------
Input:      The weather is warm.
Translation: 天气暖暖。
--------------------
Input:      He fixes the broken toy.
Translation: 他把玩具固定了。
--------------------
Input:      She calls her friend.
Translation: 她打电话给她的朋友。
--------------------
Input:      The grass is green.
Translation: 草是绿色的。
--------------------
Input:      I like ice cream.
Translation: 我喜欢冰淇淋。
--------------------
Input:      They go on a holiday.
Translation: 他们一天去度假。
--------------------
Input:      My mother cooks tasty food.
Translation: 妈妈的菜吃了香味。
--------------------
Input:      We have a picnic.
Translation: 我们有一个野餐。
--------------------
Input:      The river flows slowly.
Translation: 河流慢慢慢。
--------------------
Input:      He throws the ball.
Translation: 他把球扔了。
--------------------
Input:      She smiles at me.
Translation: 她笑着我。
--------------------
Input:      The mountain is high.
Translation: 山高。
--------------------
Input:      I lost my key.
Translation: 我丢了我的钥匙。
--------------------
Input:      They help the old man.
Translation: 他们帮助老人。
--------------------
Input:      My garden is beautiful.
Translation: 我的花园很美丽。
--------------------
Input:      We share our toys.
Translation: 我们分享我们的玩具。
--------------------
Input:      The answer is simple.
Translation: 答案简单。
--------------------
Input:      He drives a blue car.
Translation: 他驾驶蓝色的车。
--------------------
Input:      She paints a landscape.
Translation: 她画了一幅景观。
--------------------
Input:      The clock is on the wall.
Translation: 钟声在墙上。
--------------------
Input:      I am learning to code.
Translation: 我学习代码。
--------------------
Input:      They make a snowman.
Translation: 他们制造雪人。
--------------------
Input:      My homework is easy.
Translation: 我的家庭工作很容易。
--------------------
Input:      We clean the house.
Translation: 我们清洁房子。
--------------------
Input:      The bird has a nest.
Translation: 鸟儿有巢。
--------------------
Input:      He catches a fish.
Translation: 他抓了一只鱼。
--------------------
Input:      She studies for the exam.
Translation: 她对考试进行研究。
--------------------
Input:      The bridge is long.
Translation: 桥长。
--------------------
Input:      I want to sleep.
Translation: 我想睡得。
--------------------
Input:      They are good friends.
Translation: 他们是好朋友。
--------------------
Input:      My cat is very playful.
Translation: 我的猫是非常有趣的。
--------------------
Input:      We are going to the beach.
Translation: 我们要到海滩上去。
--------------------
Input:      The coffee is hot.
Translation: 咖啡是热的。
--------------------
Input:      He gives her a gift.
Translation: 他给她一个礼物。
--------------------
Prediction finished.

从零实现基于Transformer的英译汉任务

1. model.py（用的是上一篇文章的代码：从0搭建Transformer-CSDN博客） import torch import torch.nn as nn import mathclass PositionalEncoding(nn.Module):def __init__ (self, d_model, dropout, max_len5000):super(PositionalEncoding,…...

编程日记 2025/8/20 17:31:16

在 PyTorch 中借助 GloVe 词嵌入完成情感分析

一. Glove 词嵌入原理 GloVe是一种学习词嵌入的方法，它希望拟合给定上下文单词i时单词j出现的次数。使用的误差函数为： 其中N是词汇表大小，是线性层参数， 是词嵌入。f(x)是权重项，用于平衡不同频率的单词对误差的影响…...

编程日记 2025/8/20 17:26:29

大数据应用开发和项目实战-电商双11美妆数据分析

数据初步了解 （head出现，意味着只出现前5行，如果只出现后面几行就是tail） info shape describe 数据清洗重复值处理这个重复值是否去掉要看实际情况，比如说：昨天卖了5瓶七喜，今天卖了5瓶七…...

编程日记 2025/8/17 8:31:21

web服务

一、nginx的安装与启用 nginx的安装开源版本的Nginx官网：http://nginx.org Nginx在安装的过程中可以选择源码安装也可以选择使用软件包安装源码安装下载相应的源码压缩包解压后编译完成安装软件安装包可以使用rpm或者apt命令进行安装，也可以使用dnf…...

编程日记 2025/8/20 17:28:19

在Spring Boot 中如何配置MongoDB的副本集 (Replica Set) 或分片集群 (Sharded Cluster)？

在 Spring Boot 中配置 MongoDB 副本集 (Replica Set) 或分片集群 (Sharded Cluster) 非常相似，主要区别在于连接字符串 (URI) 中提供的主机列表和一些特定选项。最常的方式是使用 spring.data.mongodb.uri 属性配置连接字符串。 1. 连接到 MongoDB 副本集 (Repl…...

编程日记 2025/8/18 7:46:10

Oracle中游标和集合的定义查询及取值

在 Oracle 存储过程中，使用游标处理自定义数据行类型时，可以通过定义记录类型（RECORD） 和游标（CURSOR） 结合实现。 1. 定义自定义记录类型使用 TYPE … IS RECORD 定义自定义行数据类型： DE…...

编程日记 2025/8/20 17:24:40

Python企业级MySQL数据库开发实战指南

简介 Python与MySQL的完美结合是现代Web应用和数据分析系统的基石，能够创建高效稳定的企业级数据库解决方案。本文将从零开始，全面介绍如何使用Python连接MySQL数据库，设计健壮的表结构，实现CRUD操作，并掌握连接池管理、事务处理、批量操作和防止SQL注入等企业级开发核心…...

编程日记 2025/8/17 16:57:37

【LLM】Open WebUI 使用指南：详细图文教程

Open WebUI 是一个开源的、可扩展且用户友好的自托管 AI 平台，专为生成式人工智能模型交互而设计。 Open WebUI 旨在为用户提供一个简单易用、功能强大且高度定制化的界面，使其能够轻松与各种 AI 模型（如文本生成、图像生成、语音识别等）进行交互。一、安装与初始化配置扩…...

编程日记 2025/8/20 4:31:49

前端封装框架依赖管理全攻略：构建轻量可维护的私有框架

前端封装框架依赖管理全攻略：构建轻量可维护的私有框架前言在自研前端框架的开发中，依赖管理是决定框架可用性的关键因素。不合理的依赖设计会导致： 项目体积膨胀：重复依赖使最终打包体积增加30%版本地狱：不同项目…...

编程日记 2025/8/20 17:24:42

Listremove数据时报错：Caused by: java.lang.UnsupportedOperationException

看了二哥的foreach陷阱后，自己也遇见了需要循环删除元素的情况，立马想到了当时自己阴差阳错的避开所有坑的解决方式：先倒序遍历，再删除。之前好使，但是这次不好使了，报错Caused by: java.lang.UnsupportedO…...

编程日记 2025/8/20 17:29:51

互联网大厂Java求职面试：云原生与AI融合下的系统设计挑战-1

互联网大厂Java求职面试：云原生与AI融合下的系统设计挑战-1 在当今云计算和人工智能迅猛发展的背景下，互联网大厂对Java工程师的要求已从传统的单体架构和业务逻辑处理，转向了更复杂的云原生架构设计、AI模型集成以及高并发系统的性能优化能…...

编程日记 2025/8/20 17:28:19

并发设计模式实战系列(16)：屏障（Barrier）

🌟 大家好，我是摘星！ 🌟 今天为大家带来的是并发设计模式实战系列，第十六章屏障（Barrier），废话不多说直接开始~ 目录一、核心原理深度拆解 1. 屏障的同步机制 2. 关键参数二…...

编程日记 2025/8/18 22:10:32

pywinauto通过图片定位怎么更加精准的识别图片？

pywinauto通过图片定位怎么更加精准的识别图片？ 可以使用置信度的配置，添加了对比图片相似程度达到多少就可以认为是合适的定位图片 import time from time import sleep from pywinauto.application import Application from pywinauto.keyboard impo…...

编程日记 2025/8/20 17:28:21

Spring Cloud Stream集成RocketMQ（kafka/rabbitMQ通用）

什么是Spring Cloud Stream Spring Cloud Stream 是 Spring 生态系统中的一个框架，用于简化构建消息驱动微服务的开发和集成。它通过抽象化的方式将消息中间件（如 RabbitMQ、Kafka、RocketMQ 等）的复杂通信逻辑封装成简单的编程模型&#xf…...

编程日记 2025/8/20 17:29:53

基于docker使用showdoc搭建API开发文档服务器

以下是基于 Docker 快速搭建 ShowDoc API 文档服务器的完整指南，包含优化配置和常见问题解决方案： 1. 快速部署方案 # 创建数据目录（确保权限） mkdir -p /showdoc_data/html && chmod 777 -R /showdoc_data# 一键启动容器…...

编程日记 2025/8/20 12:24:37

Vision-Language Models (VLMs) 视觉语言模型的技术背景、应用场景和商业前景（Grok3 DeepSearch模式回答）

prompt: 你是一位文笔精湛、十分专业的技术博客作者，你将从技术背景、应用场景和商业前景等多个维度去向读者介绍Vision-Language Models 关键要点研究表明，视觉语言模型（VLMs）是多模态AI系统，能同时处理视觉和文本数…...

编程日记 2025/8/18 10:12:44

OpenAI大变革！继续与微软等，以非营利模式冲击AGI

今天凌晨2点，OpenAI宣布，将继续由非营利组织控制；现有的营利性实体将转变为一家公共利益公司；非营利组织将控制该公共利益公司，并成为其重要的持股方。这也就是说OpenAI曾在去年提到的由非营利性转变成营利性公司&am…...

编程日记 2025/8/20 7:23:56

Ubuntu打开中文文本乱码

文章目录中文乱码问题修复乱码系统字符编码修改文本编码修改vim乱码 utf-8编码原理特点应用场景与其他编码的转换 iso-8859-1基本信息字符涵盖应用场景与其他编码的关系 ubuntu打开文本出现乱码，可能是编码没设置对。中文乱码问题使用vim打开文本，或…...

编程日记 2025/8/20 12:06:42

车载通信网络安全：挑战与解决方案

1. 简介当今时代见证了车载汽车技术的巨大发展，因为现代智能汽车可以被视为具有出色外部基础设施连接能力的信息物理系统 [ 1 ]。车载技术支持的现代智能汽车不应被视为类似于机械系统，而是由数百万行复杂代码组成的集成架构，可为车内乘客提…...

编程日记 2025/8/18 11:11:06

【Linux系统】读写锁

读者写者问题重点读者写者问题是并发编程中的经典问题，主要研究多个进程或线程对共享数据进行读和写操作时如何实现同步和互斥，以保证数据的一致性和操作的正确性。问题核心要点同步与互斥：需要确保多个读者可以同时读共享数据&#…...

编程日记 2025/8/18 13:26:17

springBoot中自定义一个validation注解，实现指定枚举值校验

缘由在后台写接口的时候，经常会出现dto某个属性是映射到一个枚举的情况。有时候还会出现只能映射到枚举类中部分枚举值的情况。以前都是在service里面自行判断，很多地方代码冗余，所以就想着弄一个自定义的validation注解来实现。例如下面某…...

编程日记 2025/8/17 17:59:34

【Python】--装饰器

装饰器（Decorator）本质上是一个返回函数的函数主要作用是：在不修改原函数代码的前提下，给函数增加额外的功能比如：增加业务，日志记录、权限验证、执行时间统计、缓存等场景 my_decorator def func():pas…...

编程日记 2025/8/18 10:12:28

排序算法——堆排序

一、介绍「堆排序heapsort」是一种基于堆数据结构实现的高效排序算法。我们可以利用已经学过的“建堆操作”和“元素出堆操作”实现堆排序。 1. 输入数组并建立小顶堆，此时最小元素位于堆顶。 2. 不断执行出堆操作，依次记录出堆元素，即可得…...

编程日记 2025/8/18 10:12:26

Day111 | 灵神 | 二叉树 | 验证二叉搜索树

Day111 | 灵神 | 二叉树 | 验证二叉搜索树 98.验证二叉搜索树 98. 验证二叉搜索树 - 力扣（LeetCode） 方法一：前序遍历递归函数传入合法的左右边界，只有当前结点是合法的边界，才是二叉搜索树，否则就返回…...

编程日记 2025/8/20 2:17:42

软考-软件设计师中级备考 13、刷题数据结构

倒计时17天时间不多了，数据库、UML、等知识点有基础直接略过，法律全靠考前的一两天刷题，英语直接放弃。一、数据结构：链表、栈、队列、数组、哈希表、树、图 1、关于链表操作，说法正确的是： A)新增一个头…...

编程日记 2025/8/20 2:03:23

【5G通信】天线调整

在天线工程中，机械下倾角、电子下倾角和数字下倾角是调整天线波束指向的不同技术手段，其核心区别在于实现方式和灵活性： 1. 机械下倾角（Mechanical Downtilt） 定义：通过物理调整天线的安装角度&#xff0c…...

编程日记 2025/8/19 14:27:34

Kafka的Log Compaction原理是什么？

Kafka的Log Compaction（日志压缩）是一种独特的数据保留策略，其核心原理是保留每个key的最新有效记录。以下是关键原理分点说明： 1. 键值保留机制通过扫描所有消息的key，仅保留每个key对应的最新value值。例如&#…...

编程日记 2025/8/18 7:07:11

嵌入式面试八股文（十四）·内存管理机制、优先级继承机制以及优先级翻转

目录 1. 内存管理算法（五种内存管理机制） 1.1 heap_1.c 1.2 heap_2.c 1.3 heap_3.c 1.4 heap_4.c 1.5 heap_5.c 1.6 总结 2. STM32通知寄存器有哪些？ 2.1 核心寄存器组（Cortex-M） 2.2 特殊功能寄存…...

编程日记 2025/8/18 10:12:13

深度剖析：可视化如何重塑驾驶舱信息交互模式

为什么你开车时总觉得“信息太多却抓不住重点”？ 今天的汽车早已不是单纯的交通工具，而是一个高度集成的信息终端。从导航、油耗、胎压到自动驾驶提示，各种数据不断涌进驾驶舱。但问题也随之而来： 关键信息被淹没在一堆图标里…...

编程日记 2025/8/17 11:00:14

app根据蓝牙名字不同，匹配不同的产品型号，显示对应的UI界面

在开发一个 App 时，如果希望根据蓝牙设备名称（Bluetooth Name）的不同，自动匹配不同的产品型号，并显示对应的 UI 界面，可以按照以下思路来实现： ✅ 功能目标扫描并连接蓝牙设备；获取…...

编程日记 2025/8/20 2:03:25

数据结构 --- 栈

1.栈的初始化 2.入栈 3.出栈 4.取出栈顶元素 5.获取栈中有效元素个数 6.栈的销毁栈：⼀种特殊的线性表，其只允许在固定的⼀端进⾏插⼊和删除元素操作。进⾏数据插⼊和删除操作的⼀端称为栈顶，另⼀端称为栈底。栈中的数据元素遵守后进先…...

编程日记 2025/8/19 17:03:36

37-算法打卡-栈与队列-滑动窗口最大值-leetcode(239)-第三十七天

1 题目地址 239. 滑动窗口最大值 - 力扣（LeetCode）239. 滑动窗口最大值 - 给你一个整数数组 nums，有一个大小为 k 的滑动窗口从数组的最左侧移动到数组的最右侧。你只可以看到在滑动窗口内的 k 个数字。滑动窗口每次只向右移动一位。返回滑…...

编程日记 2025/8/18 10:12:02

【原创分享】魔音变声器内含超多语音包实时变声

魔音变声器,一款专业的调音变声器软件亲测可使用所有功能[真棒] 去除所有广告 ————————————【下载地址】———————————— 【获取方法1】：https://pan.xunlei.com/s/VOP_TXtKNlevTgYvIlxmmJquA1?pwd8vpi# ————————————【下 …...

编程日记 2025/8/19 15:09:21

数据结构（一）——线性表的顺序表示和实现

一、线性表的定义由n(n>0)个数据特性相同的元素构成的有限序列称为线性表，(n0)的时候被称为空表。一个数据元素可以是简单的一个数据，一个符号，也可以是复杂的若干个数据项的组合。二、线性表的类型定义 s线性表是由n(n≥0)个相同类…...

编程日记 2025/8/18 10:11:58

Winform（12.控件讲解）

ChildForm窗口： ChildForm代码： using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; namespac…...

编程日记 2025/8/18 10:11:55

1. model.py（用的是上一篇文章的代码：从0搭建Transformer-CSDN博客）

2. train.py（数据量很大，使用其中一部分进行训练和验证，数据集来源：中英互译数据集(translation2019zh)_数据集-飞桨AI Studio星河社区）

3. predict.py（模型预测）

predict.py运行结果展示：

相关文章：