当前位置: 首页 > news >正文

PyTorch 深度学习实战(23):多任务强化学习(Multi-Task RL)之扩展

之前的PyTorch 深度学习实战(23):多任务强化学习(Multi-Task RL)总结扩展运用代码如下:

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.distributions import Normal
from torch.amp import autocast, GradScaler
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
import time
from collections import deque# ================== 配置参数 ==================
class MultiTaskPPOConfig:# 任务配置task_names = ['reach-v2-goal-observable','push-v2-goal-observable','pick-place-v2-goal-observable']num_tasks = 3# 网络架构shared_dim = 512task_specific_dim = 256meta_controller_dim = 128shared_layers = 2task_specific_layers = 1# 训练参数lr = 5e-5meta_lr = 1e-5gamma = 0.99gae_lambda = 0.97clip_epsilon = 0.15ppo_epochs = 5batch_size = 4096max_episodes = 10000max_steps = 200grad_clip = 0.5entropy_coef = 0.1# 探索参数initial_std = 1.5min_std = 0.2std_decay = 0.999# 课程学习安排curriculum_schedule = {0: ['reach-v2-goal-observable'],1000: ['reach-v2-goal-observable', 'push-v2-goal-observable'],3000: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'],6000: ['reach-v2-goal-observable', 'push-v2-goal-observable', 'pick-place-v2-goal-observable']}# 监控配置log_interval = 50eval_interval = 500eval_episodes = 10device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# ================== MetaController ==================
class MetaController(nn.Module):def __init__(self, num_tasks, state_dim):super().__init__()self.net = nn.Sequential(nn.Linear(state_dim, MultiTaskPPOConfig.meta_controller_dim),nn.LayerNorm(MultiTaskPPOConfig.meta_controller_dim),nn.GELU(),nn.Linear(MultiTaskPPOConfig.meta_controller_dim, num_tasks))# 初始化参数for layer in self.net:if isinstance(layer, nn.Linear):nn.init.orthogonal_(layer.weight, gain=0.01)nn.init.constant_(layer.bias, 0.0)def forward(self, state):logits = self.net(state)return torch.softmax(logits, -1), logits# ================== 共享策略网络 ==================
class SharedPolicy(nn.Module):def __init__(self, state_dim, action_dim):super().__init__()self.action_dim = action_dimself.current_std = MultiTaskPPOConfig.initial_std# 共享网络层self.shared_net = nn.Sequential(nn.Linear(state_dim, MultiTaskPPOConfig.shared_dim),nn.LayerNorm(MultiTaskPPOConfig.shared_dim),nn.GELU(),nn.Linear(MultiTaskPPOConfig.shared_dim, MultiTaskPPOConfig.shared_dim),nn.GELU())# 多任务头部self.task_heads = nn.ModuleList()self.value_heads = nn.ModuleList()for _ in range(MultiTaskPPOConfig.num_tasks):# 动作头task_head = nn.Sequential(nn.Linear(MultiTaskPPOConfig.shared_dim, MultiTaskPPOConfig.task_specific_dim),nn.GELU(),nn.Linear(MultiTaskPPOConfig.task_specific_dim, action_dim))self.task_heads.append(task_head)# 值函数头value_head = nn.Sequential(nn.Linear(MultiTaskPPOConfig.shared_dim, MultiTaskPPOConfig.task_specific_dim),nn.GELU(),nn.Linear(MultiTaskPPOConfig.task_specific_dim, 1))self.value_heads.append(value_head)# 可学习的对数标准差self.log_std = nn.Parameter(torch.zeros(1, action_dim))# 初始化参数self._init_weights()def _init_weights(self):for head in self.task_heads:for layer in head:if isinstance(layer, nn.Linear):nn.init.orthogonal_(layer.weight, gain=0.01)nn.init.constant_(layer.bias, 0.0)for head in self.value_heads:for layer in head:if isinstance(layer, nn.Linear):nn.init.orthogonal_(layer.weight, gain=1.0)nn.init.constant_(layer.bias, 0.0)def decay_action_std(self):"""衰减动作标准差"""self.current_std = max(self.current_std * MultiTaskPPOConfig.std_decay,MultiTaskPPOConfig.min_std)def forward(self, states, task_ids):# 确保输入是float32states = states.float() if states.dtype != torch.float32 else statesshared_features = self.shared_net(states)batch_size = states.size(0)# 初始化输出张量action_means = torch.zeros(batch_size, self.action_dim,dtype=torch.float32,device=states.device)action_stds = torch.exp(self.log_std).expand(batch_size, -1) * self.current_stdvalues = torch.zeros(batch_size, 1,dtype=torch.float32,device=states.device)unique_task_ids = torch.unique(task_ids)for task_id_tensor in unique_task_ids:task_id = task_id_tensor.item()mask = (task_ids == task_id_tensor)if not mask.any():continueselected_features = shared_features[mask]# 计算任务特定输出with autocast(device_type=states.device.type, enabled=False):  # 禁用混合精度task_action = self.task_heads[task_id](selected_features.float())task_value = self.value_heads[task_id](selected_features.float())action_means[mask] = task_actionvalues[mask] = task_valuereturn action_means, action_stds, values# ================== 训练系统 ==================
class EnhancedMultiTaskPPOTrainer:def __init__(self):# 初始化多任务环境self.envs = []self.state_dim = Noneself.action_dim = None# 验证环境并获取维度for task_name in MultiTaskPPOConfig.task_names:env = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[task_name]()obs, _ = env.reset()if self.state_dim is None:self.state_dim = obs.shape[0]self.action_dim = env.action_space.shape[0]else:assert obs.shape[0] == self.state_dim, f"状态维度不一致: {task_name}"self.envs.append(env)# 初始化策略网络self.policy = SharedPolicy(self.state_dim, self.action_dim).to(MultiTaskPPOConfig.device)self.optimizer = optim.AdamW(self.policy.parameters(), lr=MultiTaskPPOConfig.lr)self.scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer,T_max=MultiTaskPPOConfig.max_episodes,eta_min=1e-6)self.scaler = GradScaler(enabled=MultiTaskPPOConfig.device.type == 'cuda')# 初始化MetaControllerself.meta_controller = MetaController(MultiTaskPPOConfig.num_tasks,self.state_dim).to(MultiTaskPPOConfig.device)self.meta_optimizer = optim.Adam(self.meta_controller.parameters(),lr=MultiTaskPPOConfig.meta_lr)# 初始化经验回放缓冲self.buffer = deque(maxlen=MultiTaskPPOConfig.max_steps)# 课程学习状态self.current_phase = 0self.phase_thresholds = sorted(MultiTaskPPOConfig.curriculum_schedule.keys())# 训练统计self.episode_rewards = {i: deque(maxlen=100) for i in range(MultiTaskPPOConfig.num_tasks)}self.episode_lengths = {i: deque(maxlen=100) for i in range(MultiTaskPPOConfig.num_tasks)}self.meta_data = {'states': [],'chosen_tasks': [],'rewards': []}# 评估统计self.eval_rewards = {i: [] for i in range(MultiTaskPPOConfig.num_tasks)}self.eval_success = {i: [] for i in range(MultiTaskPPOConfig.num_tasks)}def get_current_tasks(self, episode):"""获取当前课程阶段的任务列表"""if len(self.phase_thresholds) > 1 and self.current_phase < len(self.phase_thresholds) - 1:if episode >= self.phase_thresholds[self.current_phase + 1]:self.current_phase += 1task_names = MultiTaskPPOConfig.curriculum_schedule[self.phase_thresholds[self.current_phase]]return [MultiTaskPPOConfig.task_names.index(name) for name in task_names]def collect_experience(self, num_steps, episode):"""集成课程学习和meta controller的经验收集"""current_tasks = self.get_current_tasks(episode)for _ in range(num_steps):# 从当前课程任务中随机选择基础任务base_task_id = np.random.choice(current_tasks)env = self.envs[base_task_id]if not hasattr(env, '_last_obs'):state, _ = env.reset()else:state = env._last_obs# MetaController调整state_tensor = torch.FloatTensor(state).unsqueeze(0).to(MultiTaskPPOConfig.device)with torch.no_grad():task_probs, _ = self.meta_controller(state_tensor)task_probs = task_probs.squeeze().cpu().numpy()# 过滤概率分布mask = np.zeros_like(task_probs)mask[current_tasks] = 1filtered_probs = task_probs * maskfiltered_probs = filtered_probs / (filtered_probs.sum() + 1e-6)# 任务选择策略if np.random.rand() < 0.7:task_id = np.random.choice(current_tasks, p=filtered_probs[current_tasks])else:task_id = np.random.choice(current_tasks)# 记录meta controller决策self.meta_data['states'].append(state_tensor)self.meta_data['chosen_tasks'].append(task_id)# 执行选择的taskenv = self.envs[task_id]with torch.no_grad():task_id_tensor = torch.tensor([task_id], dtype=torch.long, device=MultiTaskPPOConfig.device)action_mean, action_std, value = self.policy(state_tensor, task_id_tensor)dist = Normal(action_mean.float(), action_std.float())  # 确保分布参数是float32action = dist.sample().squeeze(0)log_prob = dist.log_prob(action).sum(-1, keepdim=True)action_np = action.cpu().numpy()next_state, reward, done, trunc, info = env.step(action_np)# 记录数据self.buffer.append({'state': state,'action': action_np,'log_prob': log_prob.cpu(),'reward': float(reward),'done': bool(done),'task_id': task_id,'value': float(value.item()),'success': info.get('success', False)})# 记录meta controller的反馈self.meta_data['rewards'].append(reward)state = next_state if not (done or trunc) else env.reset()[0]def compute_gae(self, values, rewards, dones):"""计算广义优势估计(GAE)"""advantages = []last_advantage = 0next_value = 0next_non_terminal = 1.0for t in reversed(range(len(rewards))):delta = rewards[t] + MultiTaskPPOConfig.gamma * next_value * next_non_terminal - values[t]last_advantage = delta + MultiTaskPPOConfig.gamma * MultiTaskPPOConfig.gae_lambda * next_non_terminal * last_advantageadvantages.append(last_advantage)next_value = values[t]next_non_terminal = 1.0 - dones[t]advantages = torch.tensor(advantages[::-1], dtype=torch.float32).to(MultiTaskPPOConfig.device)returns = advantages + torch.tensor(values, dtype=torch.float32).to(MultiTaskPPOConfig.device)return (advantages - advantages.mean()) / (advantages.std() + 1e-8), returnsdef calculate_task_weights(self):"""基于最近表现计算任务权重"""task_weights = torch.ones(MultiTaskPPOConfig.num_tasks,device=MultiTaskPPOConfig.device)for task_id in range(MultiTaskPPOConfig.num_tasks):if len(self.episode_rewards[task_id]) > 10:# 计算最近10个episode的成功率recent_rewards = list(self.episode_rewards[task_id])[-10:]success_rate = sum(1 for r in recent_rewards if r > 0) / len(recent_rewards)# 动态调整权重if success_rate < 0.3:task_weights[task_id] = 2.0  # 困难任务加倍权重elif success_rate > 0.8:task_weights[task_id] = 0.5  # 简单任务减半权重return task_weights / task_weights.sum()def update_meta_controller(self):"""更新任务选择策略"""if len(self.meta_data['states']) == 0:returnstates = torch.cat(self.meta_data['states'])chosen_tasks = torch.tensor(self.meta_data['chosen_tasks'],device=MultiTaskPPOConfig.device)rewards = torch.tensor(self.meta_data['rewards'],dtype=torch.float32,device=MultiTaskPPOConfig.device)# 清空数据self.meta_data = {'states': [],'chosen_tasks': [],'rewards': []}# 归一化奖励rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-6)# 更新MetaControllertask_probs, logits = self.meta_controller(states)selected_probs = task_probs.gather(1, chosen_tasks.unsqueeze(1))loss = -torch.log(selected_probs + 1e-6) * rewards.unsqueeze(1)loss = loss.mean()self.meta_optimizer.zero_grad()loss.backward()torch.nn.utils.clip_grad_norm_(self.meta_controller.parameters(),MultiTaskPPOConfig.grad_clip)self.meta_optimizer.step()def update_policy(self):"""策略更新方法"""if not self.buffer:return 0, 0, 0# 从缓冲中提取数据batch = list(self.buffer)states = torch.tensor([x['state'] for x in batch],dtype=torch.float32,device=MultiTaskPPOConfig.device)actions = torch.FloatTensor(np.array([x['action'] for x in batch])).to(MultiTaskPPOConfig.device)old_log_probs = torch.cat([x['log_prob'] for x in batch]).to(MultiTaskPPOConfig.device)rewards = torch.FloatTensor([x['reward'] for x in batch]).to(MultiTaskPPOConfig.device)dones = torch.FloatTensor([x['done'] for x in batch]).to(MultiTaskPPOConfig.device)task_ids = torch.tensor([x['task_id'] for x in batch],dtype=torch.long,device=MultiTaskPPOConfig.device)values = torch.FloatTensor([x['value'] for x in batch]).to(MultiTaskPPOConfig.device)successes = torch.FloatTensor([x['success'] for x in batch]).to(MultiTaskPPOConfig.device)# 计算GAE和returnsadvantages, returns = self.compute_gae(values.cpu().numpy(), rewards.cpu().numpy(), dones.cpu().numpy())# 计算任务权重task_weights = self.calculate_task_weights()# 自动混合精度训练total_policy_loss = 0total_value_loss = 0total_entropy = 0for _ in range(MultiTaskPPOConfig.ppo_epochs):# 随机打乱数据perm = torch.randperm(len(batch))for i in range(0, len(batch), MultiTaskPPOConfig.batch_size):idx = perm[i:i + MultiTaskPPOConfig.batch_size]# 获取小批量数据batch_states = states[idx]batch_actions = actions[idx]batch_old_log_probs = old_log_probs[idx]batch_returns = returns[idx]batch_advantages = advantages[idx]batch_task_ids = task_ids[idx]with autocast(device_type=MultiTaskPPOConfig.device.type,enabled=MultiTaskPPOConfig.device.type == 'cuda'):# 前向传播action_means, action_stds, new_values = self.policy(batch_states, batch_task_ids)dist = Normal(action_means, action_stds)new_log_probs = dist.log_prob(batch_actions).sum(-1, keepdim=True)entropy = dist.entropy().mean()# 计算重要性采样比率ratio = (new_log_probs - batch_old_log_probs).exp()# 策略损失surr1 = ratio * batch_advantages.unsqueeze(-1)surr2 = torch.clamp(ratio, 1 - MultiTaskPPOConfig.clip_epsilon,1 + MultiTaskPPOConfig.clip_epsilon) * batch_advantages.unsqueeze(-1)policy_loss_per_task = -torch.min(surr1, surr2)# 应用任务权重selected_weights = task_weights[batch_task_ids].unsqueeze(-1)policy_loss = (policy_loss_per_task * selected_weights).mean()policy_loss -= MultiTaskPPOConfig.entropy_coef * entropy# 值函数损失 (带clip)value_pred_clipped = values[idx] + (new_values - values[idx]).clamp(-MultiTaskPPOConfig.clip_epsilon,MultiTaskPPOConfig.clip_epsilon)value_loss1 = (new_values.squeeze() - batch_returns).pow(2)value_loss2 = (value_pred_clipped.squeeze() - batch_returns).pow(2)value_loss = 0.5 * torch.max(value_loss1, value_loss2).mean()# 总损失loss = policy_loss + value_loss# 反向传播self.scaler.scale(loss).backward()total_policy_loss += policy_loss.item()total_value_loss += value_loss.item()total_entropy += entropy.item()# 梯度裁剪和参数更新self.scaler.unscale_(self.optimizer)torch.nn.utils.clip_grad_norm_(self.policy.shared_net.parameters(), 1.0)torch.nn.utils.clip_grad_norm_(list(self.policy.task_heads.parameters()) +list(self.policy.value_heads.parameters()),0.5)self.scaler.step(self.optimizer)self.scaler.update()self.optimizer.zero_grad()self.scheduler.step()# 衰减动作噪声self.policy.decay_action_std()return (total_policy_loss / MultiTaskPPOConfig.ppo_epochs,total_value_loss / MultiTaskPPOConfig.ppo_epochs,total_entropy / MultiTaskPPOConfig.ppo_epochs)def evaluate_policy(self):"""评估当前策略性能"""eval_results = {i: {'rewards': [], 'successes': []} for i in range(MultiTaskPPOConfig.num_tasks)}for task_id in range(MultiTaskPPOConfig.num_tasks):env = self.envs[task_id]for _ in range(MultiTaskPPOConfig.eval_episodes):state, _ = env.reset()episode_reward = 0done = Falsesuccess = Falsefor _ in range(MultiTaskPPOConfig.max_steps):with torch.no_grad():state_tensor = torch.FloatTensor(state).unsqueeze(0).to(MultiTaskPPOConfig.device)task_id_tensor = torch.tensor([task_id], dtype=torch.long, device=MultiTaskPPOConfig.device)action_mean, _, _ = self.policy(state_tensor, task_id_tensor)action = action_mean.squeeze(0).cpu().numpy()state, reward, done, trunc, info = env.step(action)episode_reward += rewardsuccess = success or info.get('success', False)if done or trunc:breakeval_results[task_id]['rewards'].append(episode_reward)eval_results[task_id]['successes'].append(success)# 记录评估结果for task_id in range(MultiTaskPPOConfig.num_tasks):avg_reward = np.mean(eval_results[task_id]['rewards'])success_rate = np.mean(eval_results[task_id]['successes'])self.eval_rewards[task_id].append(avg_reward)self.eval_success[task_id].append(success_rate)return eval_resultsdef train(self):print(f"开始训练,设备:{MultiTaskPPOConfig.device}")print(f"课程安排:{MultiTaskPPOConfig.curriculum_schedule}")start_time = time.time()# 初始评估self.evaluate_policy()for episode in range(MultiTaskPPOConfig.max_episodes):# 经验收集阶段self.collect_experience(MultiTaskPPOConfig.max_steps, episode)# 策略优化阶段policy_loss, value_loss, entropy = self.update_policy()# MetaController更新self.update_meta_controller()# 记录统计信息for exp in self.buffer:task_id = exp['task_id']self.episode_rewards[task_id].append(exp['reward'])self.episode_lengths[task_id].append(1)# 定期输出日志if (episode + 1) % MultiTaskPPOConfig.log_interval == 0:avg_rewards = {k: np.mean(v) if v else 0 for k, v in self.episode_rewards.items()}success_rates = {k: np.mean([1 if r > 0 else 0 for r in v]) if v else 0for k, v in self.episode_rewards.items()}time_cost = time.time() - start_time# 打印当前课程阶段current_task_names = MultiTaskPPOConfig.curriculum_schedule[self.phase_thresholds[self.current_phase]]print(f"\nEpisode {episode + 1:5d} | Time: {time_cost:6.1f}s")print(f"当前课程阶段: {current_task_names} (Phase {self.current_phase})")print(f"动作标准差: {self.policy.current_std:.3f} | 学习率: {self.scheduler.get_last_lr()[0]:.2e}")for task_id in range(MultiTaskPPOConfig.num_tasks):task_name = MultiTaskPPOConfig.task_names[task_id]print(f"  {task_name:25s} | Avg Reward: {avg_rewards[task_id]:7.2f} | Success Rate: {success_rates[task_id]:.2f}")print(f"  Policy Loss: {policy_loss:.4f} | Value Loss: {value_loss:.4f} | Entropy: {entropy:.4f}")start_time = time.time()# 定期评估if (episode + 1) % MultiTaskPPOConfig.eval_interval == 0:eval_results = self.evaluate_policy()if (episode + 1) % 1000 == 0:print("\n评估结果:")for task_id in range(MultiTaskPPOConfig.num_tasks):task_name = MultiTaskPPOConfig.task_names[task_id]avg_reward = np.mean(eval_results[task_id]['rewards'])success_rate = np.mean(eval_results[task_id]['successes'])print(f"  {task_name:25s} | Avg Reward: {avg_reward:7.2f} | Success Rate: {success_rate:.2f}")# 训练结束保存模型torch.save({'policy_state_dict': self.policy.state_dict(),'meta_controller_state_dict': self.meta_controller.state_dict(),'optimizer_state_dict': self.optimizer.state_dict()}, "multitask_ppo_model.pth")if __name__ == "__main__":trainer = EnhancedMultiTaskPPOTrainer()print(f"状态维度: {trainer.state_dim}, 动作维度: {trainer.action_dim}")trainer.train()

部分输出为:

Episode    50 | Time:  216.6s
当前课程阶段: ['reach-v2-goal-observable'] (Phase 0)
动作标准差: 1.427 | 学习率: 5.00e-05reach-v2-goal-observable  | Avg Reward:    1.42 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.00 | Success Rate: 0.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: -0.1777 | Value Loss: 471.4303 | Entropy: 1.7773Episode   100 | Time:  193.3s
当前课程阶段: ['reach-v2-goal-observable'] (Phase 0)
动作标准差: 1.357 | 学习率: 5.00e-05reach-v2-goal-observable  | Avg Reward:    1.42 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.00 | Success Rate: 0.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: -0.1729 | Value Loss: 357.7264 | Entropy: 1.7293......Episode  2800 | Time:  198.6s
当前课程阶段: ['reach-v2-goal-observable', 'push-v2-goal-observable'] (Phase 1)
动作标准差: 0.200 | 学习率: 4.11e-05reach-v2-goal-observable  | Avg Reward:    1.44 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: 0.0092 | Value Loss: 191.3147 | Entropy: -0.0918Episode  2850 | Time:  212.2s
当前课程阶段: ['reach-v2-goal-observable', 'push-v2-goal-observable'] (Phase 1)
动作标准差: 0.200 | 学习率: 4.08e-05reach-v2-goal-observable  | Avg Reward:    1.44 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: 0.0090 | Value Loss: 183.6324 | Entropy: -0.0902Episode  2900 | Time:  210.4s
当前课程阶段: ['reach-v2-goal-observable', 'push-v2-goal-observable'] (Phase 1)
动作标准差: 0.200 | 学习率: 4.05e-05reach-v2-goal-observable  | Avg Reward:    1.44 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: 0.0089 | Value Loss: 188.5185 | Entropy: -0.0889Episode  2950 | Time:  210.1s
当前课程阶段: ['reach-v2-goal-observable', 'push-v2-goal-observable'] (Phase 1)
动作标准差: 0.200 | 学习率: 4.02e-05reach-v2-goal-observable  | Avg Reward:    1.44 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: 0.0087 | Value Loss: 183.0386 | Entropy: -0.0874Episode  3000 | Time:  212.0s
当前课程阶段: ['reach-v2-goal-observable', 'push-v2-goal-observable'] (Phase 1)
动作标准差: 0.200 | 学习率: 3.99e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.00 | Success Rate: 0.00Policy Loss: 0.0086 | Value Loss: 182.9761 | Entropy: -0.0858评估结果:reach-v2-goal-observable  | Avg Reward:  106.66 | Success Rate: 0.00push-v2-goal-observable   | Avg Reward:    3.99 | Success Rate: 0.00pick-place-v2-goal-observable | Avg Reward:    4.49 | Success Rate: 0.00Episode  3050 | Time:  234.3s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.96e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0084 | Value Loss: 28.1028 | Entropy: -0.0843Episode  3100 | Time:  210.3s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.93e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0083 | Value Loss: 0.1660 | Entropy: -0.0829Episode  3150 | Time:  209.8s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.90e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0082 | Value Loss: 0.1506 | Entropy: -0.0818Episode  3200 | Time:  210.2s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.86e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0080 | Value Loss: 0.1429 | Entropy: -0.0801Episode  3250 | Time:  210.3s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.83e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0079 | Value Loss: 0.1725 | Entropy: -0.0785Episode  3300 | Time:  209.7s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.80e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0077 | Value Loss: 0.1990 | Entropy: -0.0771Episode  3350 | Time:  209.5s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.76e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0076 | Value Loss: 0.2084 | Entropy: -0.0758Episode  3400 | Time:  210.1s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.73e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0075 | Value Loss: 0.2057 | Entropy: -0.0745Episode  3450 | Time:  210.9s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.70e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0073 | Value Loss: 0.2251 | Entropy: -0.0733Episode  3500 | Time:  210.1s
当前课程阶段: ['push-v2-goal-observable', 'pick-place-v2-goal-observable'] (Phase 2)
动作标准差: 0.200 | 学习率: 3.66e-05reach-v2-goal-observable  | Avg Reward:    1.45 | Success Rate: 1.00push-v2-goal-observable   | Avg Reward:    0.05 | Success Rate: 1.00pick-place-v2-goal-observable | Avg Reward:    0.02 | Success Rate: 1.00Policy Loss: 0.0072 | Value Loss: 0.2199 | Entropy: -0.0723......

相关文章:

PyTorch 深度学习实战(23):多任务强化学习(Multi-Task RL)之扩展

之前的PyTorch 深度学习实战&#xff08;23&#xff09;&#xff1a;多任务强化学习&#xff08;Multi-Task RL)总结扩展运用代码如下&#xff1a; import torch import torch.nn as nn import torch.optim as optim import numpy as np from torch.distributions import Norm…...

音视频开发---视频编码基础

一、视频编码的必要性 1. 存储与传输成本高 未经编码压缩的原始视频的数据量极大,例如:一般电影的亮度信号采样频率为13.5MHz;色度信号的频带通常为亮度信号的一半或更少,为6.75MHz或3.375MHz。以4:2:2的采样频率为例,Y信号采用13.5MHz,色度信号U和V采用6.75MHz采样,…...

深入蜂窝物联网 第四章 Cat-1 与 5G RedCap:带宽、低时延与未来趋势

1. 前言与应用场景 随着物联网对带宽与时延的需求不断增长,LTE Cat-1 和 5G RedCap(Reduced Capability)应运而生: Cat-1:在传统 LTE 网络上提供最高 10 Mbps 下行、5 Mbps 上行,兼容性佳; 5G RedCap:在 5G NSA/SA 网络中提供 1–20 Mbps,时延可降至 10 ms 级,且模组…...

FPGA 39 ,FPGA 网络通信协议栈进阶,RGMII、ARP 与 UDP 协议与模块设计( RGMII、ARP、UDP原理与模块设计 )

目录 目录​​​​​​​​​​​​​​ 一、核心原理 1.1 RGMII 接口&#xff1a;高效数据传输的物理桥梁 1.2 ARP 协议&#xff1a;IP 与 MAC 地址的动态映射引擎 1.3 UDP 协议&#xff1a;轻量级数据传输的高效选择 1.4 FPGA 实现流程 二、时序约束 2.1 时序约束理论…...

《系统分析师-第三阶段—总结(七)》

背景 采用三遍读书法进行阅读&#xff0c;此阶段是第三遍。 过程 本篇总结第13章第14章的内容 第13章 第14章 总结 系统设计分为概要设计与详细设计&#xff0c;然后重点讲解了处理流程设计&#xff0c;输入输出原型设计&#xff0c;面向对象设计、人机交互设计&#xff1…...

Lightroom 2025手机版:专业编辑,轻松上手

在摄影和图像编辑的世界里&#xff0c;Adobe Lightroom一直是一个不可或缺的工具。无论是专业摄影师还是摄影爱好者&#xff0c;都依赖它来提升照片的质量和视觉效果。今天&#xff0c;我们要介绍的 Lightroom 2025手机版&#xff0c;是Adobe公司为移动设备量身定制的照片编辑器…...

Cursor:AI时代的智能编辑器

在开发者社区掀起热潮的Cursor&#xff0c;正以破竹之势重塑编程工具格局。这款基于VS Code的AI优先编辑器&#xff0c;不仅延续了经典IDE的稳定基因&#xff0c;更通过深度集成的智能能力&#xff0c;将开发效率推向全新维度。2023年Anysphere公司获得的6000万美元A轮融资&…...

x86架构-k8s设置openebs的hostpath作为默认存储类的部署记录

文章目录 前言一、openebs是什么&#xff1f;二、准备步骤1.下载yaml文件2.准备一个新的单点k8s用于测试2.将openebs-operator.yaml中的镜像修改成使用国内加速源的 三、执行yaml1.openebs-operator.yaml2.local-hostpath-pvc.yaml和local-hostpath-pod.yaml 四、关于默认存储路…...

废品回收小程序:全链路数字化解决方案,赋能绿色未来

用户端&#xff1a;一键触达&#xff0c;便捷回收新体验 废品百科与估价指南&#xff1a;分类标准与实时价格一目了然&#xff0c;用户轻松掌握废品价值。一键预约&#xff0c;轻松回收&#xff1a;指尖轻点即可完成预约&#xff0c;上门服务省时省力。精准定位&#xff0c;导…...

Kotlin和JavaScript的对比

Kotlin和JavaScript有一些相似之处&#xff0c;但也存在显著的差异&#xff0c;下面从多个方面为你详细分析&#xff1a; 相似点 1. 语法灵活性 变量声明&#xff1a;二者在变量声明上都较为灵活。在JavaScript里&#xff0c;借助var、let和const可以声明变量。其中&#xf…...

蓝桥杯 5. 拼数

拼数 原题目链接 题目描述 给定 n 个正整数 a1, a2, …, an&#xff0c;你可以将它们任意排序。 现要将这 n 个数字连接成一排&#xff0c;即令相邻数字收尾相接&#xff0c;组成一个数。 问&#xff0c;这个数最大可以是多少。 输入格式 第一行输入一个正整数 n&#x…...

(即插即用模块-特征处理部分) 四十四、(2024 TGRS) FEM 特征增强模块

文章目录 1、Feature Enhancement Module2、代码实现 paper&#xff1a;FFCA-YOLO for Small Object Detection in Remote Sensing Images Code&#xff1a;https://github.com/yemu1138178251/FFCA-YOLO 1、Feature Enhancement Module 遥感图像中&#xff0c;小目标的特征通…...

“情况说明“以后,Unity XR 开发者如何选择?

Unity自4月7日发布了一系列“情况说明”&#xff0c;点进来的朋友应该都是看过的&#xff0c;此处不再赘述。此后引发了开发者社区的广泛关注和讨论。作为细分领域的XR开发者&#xff0c;此时也会面临着工具和版本的抉择或迷茫。笔者同样面临这些问题&#xff0c;因为要确定未来…...

c#版yolo可视化标注和一键免环境训练系统0429更新介绍

yolo免环境一键训练工具c#版yolo标注工具 ## 更新日志 - 2025.4.1&#xff1a; 1、软件上线 - 2025.4.6 1、调整界面&#xff0c;修复用户在1920*1080不能全部显示问题 2、修复 刷新当前目录 无法加载新增图片问题 3、新增 下一张图片快捷键 Enter或者ctrl↓&…...

Leetcode 3533. Concatenated Divisibility

Leetcode 3533. Concatenated Divisibility 1. 解题思路2. 代码实现 题目链接&#xff1a;3533. Concatenated Divisibility 1. 解题思路 这一题的话事实上如果我们原始的数组有序排列一下&#xff0c;然后依次考察每个元素是否可以被选用&#xff0c;此时&#xff0c;我们得…...

CosyVoice、F5-TTS、GPT-SoVITS、Fish-Speech声音模型项目深度对比:选型指南

在数字人、虚拟助手、智能客服等应用快速发展的背景下&#xff0c;文本转语音&#xff08;TTS&#xff09;和语音克隆技术已成为AI领域的核心技术之一。本文将对目前主流的四个开源语音合成项目——CosyVoice、F5-TTS、GPT-SoVITS 和 Fish-Speech进行全方位对比分析&#xff0c…...

什么是DNS缓存?怎么清理DNS缓存?

在网络世界中&#xff0c;当我们输入一个网址想要访问某个网站时&#xff0c;计算机并不能直接识别“www.example.com”这样的网址&#xff0c;而是需要将其转换为对应的IP地址才能进行通信。这个转换过程由域名系统&#xff08;DomainNameSystem&#xff0c;简称DNS&#xff0…...

基于STM32、HAL库的ATECC508A安全验证及加密芯片驱动程序设计

一、简介: ATECC508A是Microchip公司生产的一款加密认证芯片,提供以下主要特性: 基于硬件的ECDSA (Elliptic Curve Digital Signature Algorithm) 加密 支持SHA-256哈希算法 内置真随机数生成器(TRNG) 16个密钥存储槽位,可配置多种用途 支持I2C接口,最高1MHz时钟频率 超低…...

初中九年级学生体测准考证照片采集软件使用说明

随着中考体育测试的临近&#xff0c;各校陆续开始组织学生进行准考证照片、中考报名照片的采集工作。为方便学校集中采集和学生自主完成照片拍摄&#xff0c;本文将详细介绍使用"校园证件照采集平台"进行手机拍照线上采集的操作方法&#xff0c;帮助学校轻松完成体测…...

提供一些其他常见的字符串处理算法的Java示例

以下为你提供几种常见字符串处理算法的 Java 示例&#xff1a; 1. 字符串反转 反转字符串即把字符串中的字符顺序颠倒。 public class RemoveSpaces {public static String removeSpaces(String str) {return str.replaceAll("\\s", "");}public static…...

软件设计师-软考知识复习(2)

PERT图详解 PERT&#xff08;Program Evaluation and Review Technique&#xff0c;计划评审技术&#xff09;是一种用于项目管理的图形化工具&#xff0c;主要用于分析任务的时间安排、识别关键路径和优化资源分配。它特别适用于复杂项目&#xff0c;其中任务之间存在依赖关系…...

Qwen3快速部署 Qwen3-0.6B、Qwen3-8B、Qwen3-14B,Think Deeper

文章目录 0 Qwen31 平台与环境安装1 模型下载2 模型测试 0 Qwen3 今天&#xff0c;通义千问Qwen团队正式开源推出 Qwen3&#xff0c;这是 Qwen 系列大型语言模型的最新成员。最新的Qwen3系列模型具备双模推理能力&#xff08;深入思考/快速响应&#xff09;、支持119种语言及方…...

【C到Java的深度跃迁:从指针到对象,从过程到生态】第四模块·Java特性专精 —— 第十七章 IO流:超越FILE*的维度战争

一、从C文件操作到Java流的进化 1.1 C文件操作的原始挑战 C语言通过FILE*和低级文件描述符进行I/O操作&#xff0c;存在诸多限制&#xff1a; 典型文件复制代码&#xff1a; #include <stdio.h> int copy_file(const char* src, const char* dst) { FILE* in fope…...

Leetcode刷题记录22——滑动窗口最大值

题源&#xff1a;https://leetcode.cn/problems/sliding-window-maximum/description/?envTypestudy-plan-v2&envIdtop-100-liked 题目描述&#xff1a; 思路一&#xff1a; 暴力遍历法&#xff0c;通过一个长度为k的滑动窗口遍历nums&#xff0c;将其中最大的数依次记…...

React 第三十四节 Router 开发中 useLocation Hook 的用法以及案例详解

一、useLocation基础用法 作用&#xff1a;获取当前路由的 location 对象 返回对象结构&#xff1a; {pathname: "/about", // 当前路径search: "?namejohn", // 查询参数&#xff08;URL参数&#xff09;hash: "#contact", …...

BT134-ASEMI机器人功率器件专用BT134

编辑&#xff1a;LL BT134-ASEMI机器人功率器件专用BT134 型号&#xff1a;BT134 品牌&#xff1a;ASEMI 封装&#xff1a;TO-126 批号&#xff1a;最新 引脚数量&#xff1a;3 封装尺寸&#xff1a;如图 特性&#xff1a;双向可控硅 工作结温&#xff1a;-40℃~150℃…...

十五种光电器件综合对比——《器件手册--光电器件》

十五、光电器件 名称 原理 特点 应用 发光二极管&#xff08;LED&#xff09; 基于半导体材料的电致发光效应&#xff0c;当电流通过时&#xff0c;电子与空穴复合&#xff0c;释放出光子。 高效、节能、寿命长、响应速度快、体积小。 广泛用于指示灯、照明、显示&#…...

网络安全攻防演练实训室建设方案

一、引言 在数字化浪潮席卷全球的当下&#xff0c;网络已深度融入社会的各个层面&#xff0c;成为推动经济发展、社会进步和科技创新的关键力量。从日常生活中的移动支付、社交互动&#xff0c;到企业运营中的数据管理、业务拓展&#xff0c;再到国家关键基础设施的运行&#…...

极客天成受邀参加2050大会,共赴人工智能科技盛宴

2025年4月25日&#xff0c;备受瞩目的2050大会在杭州云栖小镇盛大开幕。作为科技领域的佼佼者&#xff0c;北京极客天成科技有限公司受邀参加了此次盛会&#xff0c;与全球科技爱好者共同探索科技的未来。 大会盛况空前&#xff0c;科技盛宴开启 2050大会是由杭州市云栖科技创…...

GUI_DrawPixel 函数详解

GUI_DrawPixel 是嵌入式GUI库中的基础像素绘制函数&#xff0c;用于在指定坐标绘制单个像素点。下面我将详细介绍这个函数的功能、实现和使用方法。 1. 函数功能 在显示屏的指定位置(x,y)绘制一个像素点 使用当前设定的颜色进行绘制 是构建所有高级图形&#xff08;线、圆、…...

AWS MSK 集群升级前配置检查:保障升级平稳进行的关键步骤

在 AWS Managed Streaming for Apache Kafka (MSK) 集群升级之前,进行全面的配置检查至关重要。本文将介绍一个用于 MSK 升级前配置检查的 Bash 脚本,帮助您识别潜在的风险点,确保升级过程的顺利进行。 为什么需要升级前检查? AWS MSK 提供了自动升级功能,但在升级过程中,不…...

leetcode 2516. 每种字符至少取 K 个

题目描述 滑动窗口问题 可以转化为求按照题目要求从两端取走字符后&#xff0c;中间部分的最大长度。中间部分就是一个滑动窗口。 class Solution { public:int takeCharacters(string s, int k) {vector<int> count(3,0);int n s.size();for(int i 0;i <n;i){cou…...

通信原理第七版与第六版区别附pdf

介绍 我用夸克网盘分享了「通信原理 第7版》樊昌信」&#xff0c;链接&#xff1a;https://pan.quark.cn/s/be7c5af4cdce 《通信原理&#xff08;第7版&#xff09;》是在第6版的基础上&#xff0c;为了适应当前通信技术发展和教学需求&#xff0c;并吸取了数十所院校教师的反…...

CPU 空转解析

在编程中&#xff0c;“避免 CPU 空转” 指的是防止程序在等待某个条件满足时&#xff0c;无意义地消耗 CPU 资源。以下是对这一问题的详细解释&#xff1a; 1. 什么是 CPU 空转&#xff1f; 当代码中出现类似以下逻辑时&#xff1a; while not condition_met: # 循环检查条…...

云蝠智能大模型智能呼叫:赋能零售行业服务,助力客户增长

在数字化浪潮席卷全球的今天&#xff0c;零售行业正面临前所未有的变革压力。消费者需求日益个性化、市场竞争愈发激烈&#xff0c;传统的人工客服模式已难以满足企业对高效触达、精准营销和极致体验的需求。而云蝠智能大模型智能呼叫系统&#xff0c;凭借其突破性的AI技术和深…...

Consul安装部署(Windows环境)

部署资料链接&#xff1a;https://download.csdn.net/download/ly1h1/90722829​​​​​ 1.下载 Consul 1.访问 Consul 官方下载页面&#xff1a;Install | Consul | HashiCorp Developer 2.选择适合 Windows 的版本&#xff08;如 consul_1.16.1_windows_amd64.zip&#xff…...

Sql刷题日志(day7)

面试&#xff1a; 1、怎么样的数据挖取能真正对业务起到指导作用&#xff1f; 明确的业务目标&#xff1a;开始之前明确你想通过数据挖掘解决的业务问题。这些问题应具体、可量化&#xff0c;并与业务战略紧密相关。正确的数据集&#xff1a;确保使用的数据与你的业务问题相关…...

【MuJoCo仿真】开源SO100机械臂导入到仿真环境

主要参考&#xff1a;https://github.com/jpata/gym-so100/tree/integration/gym_so100/assets/trs_so_arm100 参考&#xff1a;&#xff08;八&#xff09;lerobot开源项目扩展so100的仿真操控&#xff08;操作记录&#xff09;_so100机械臂 仿真-CSDN博客 下载&#xff1a;…...

redis 有序集合zrange和zrangebyscore的区别

起因是查询数据&#xff0c;用了zrangebyscore 但是一直显示没数据 具体命令zrangebyscore key 0 -1 withscores, 原有印象中一直是这么用的&#xff0c;但是突然查不出来了&#xff0c; 于是搜了下问题所在。 通过分数查看 不能用0和-1表示最小和最大&#xff0c;只能用分数来…...

基于C#窗体+GDI+绘图实现分形树

实验&#xff1a;分形树 一. 引言 实验目的:继续学习C#窗体应用程序的图形化界面设计以及GDI绘图的一些基本指示,通过制作各种类型的分形树增强对于递归的理解,在创造分形图形的过程中感受编程的快乐 Tutorial任务 1.制作不同类型的分形图形(本次演示的是两种不同类型的分…...

使用Langchain+DeepSeep进行测试相关工作

1.使用BaseChatModel实现自定义DeepSeekLLM import json import typing from typing import Optional, Any, List, Dict, Union, Sequence, Callable, Literalimport requests from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_m…...

Java练习6

一.题目 数字加密与解密 需求: 某系统的数字密码&#xff08;大于 0&#xff09;&#xff0c;比如 1983&#xff0c;采用加密方式进行传输。 规则如下: 先得到每位数&#xff0c;然后每位数都加上 5&#xff0c;再对 10 求余&#xff0c;最后将所有数字反转&#xff0c;得到一…...

二叉树知识点

1、树形结构 1.1概念 二叉树属于树形结构&#xff0c;所以先了解树形结构之后&#xff0c;再学习二叉树。 树形结构是一种非线性的数据结&#xff0c;是由n个有限节点组成的一个具有层次关系的集合&#xff0c;其形状就像一棵到这的树&#xff0c;跟朝上&#xff0c;叶子朝下…...

neo4j暴露公网ip接口——给大模型联通知识图谱

特别鸣谢 我的领导&#xff0c;我的脑子&#xff0c;我的学习能力&#xff0c;感动了 1. 搭建知识图谱数据库&#xff08;见上一章博客&#xff09; 这里不加赘述了&#xff0c;请参考上一篇博客搭建 2. FastApi包装接口 这里注意&#xff1a;NEO4J_URI不得写http:,只能写…...

在阿里云实例上部署通义千问QwQ-32B推理模型

通义千问QwQ-32B是阿里云开源的320亿参数推理模型,通过大规模强化学习在数学推理、编程及通用任务中实现性能突破,支持消费级显卡本地部署,兼顾高效推理与低资源消耗。 本文将介绍如何利用vLLM作为通义千问QwQ-32B模型的推理框架,在一台阿里云GPU实例上构建通义千问QwQ-32…...

GEE进行Theil-Sen Median斜率估计和Mann-Kendall检验

介绍一下Theil-Sen Median斜率估计和Mann-Kendall趋势分析&#xff0c;这两种方法经常结合使用&#xff0c;前者用于估计趋势的斜率&#xff0c;后者用于检验趋势的显著性。如多年NPP或者NDVI的趋势分析。 主要介绍使用GEE实现这一内容的代码方法&#xff0c;若使用python&…...

WSL2下Docker desktop的Cadvisor容器监控

由于WSL2的Docker存放是在Linux的docker-desktop目录下&#xff0c;需要从这里面挂载到WSL2里的/var/lib/docker&#xff0c;并且正确挂载启动&#xff0c;才能使 Cadvisor 识别到docker容器并且监控资源。 首先需要在WSL2终端进行挂载操作&#xff1a; sudo mount -t drvfs \…...

深度学习---pytorch搭建深度学习模型(附带图片五分类实例)

一、PyTorch搭建深度学习模型流程 1. 环境准备 安装PyTorch及相关库&#xff1a; pip install torch torchvision numpy matplotlib2. 数据准备 数据集加载&#xff1a;使用内置数据集&#xff08;如CIFAR-10&#xff09;或自定义数据集。数据预处理&#xff1a;包括归一化…...

基于 STM32 的智慧图书馆智能控制系统设计与实现

一、系统架构概述 智慧图书馆智能控制系统集成环境调控、安全监控、借阅管理与信息推送功能,通过 STM32 主控芯片联动传感器、执行器及云平台,实现图书馆智能化管理。系统架构分为感知层(传感器)、控制层(STM32 主控)、执行层(继电器 / 显示屏)及云端层(数据交互),…...

4. python3基本数据类型

Python3 中有六个标准的数据类型&#xff1a; Number&#xff08;数字&#xff09; String&#xff08;字符串&#xff09; List&#xff08;列表&#xff09; Tuple&#xff08;元组&#xff09; Set&#xff08;集合&#xff09; Dictionary&#xff08;字典&#xff09; Pyt…...