传奇私服发布网_新开传奇网站发布_最全优秀单职业传奇私服发布平台_www.sf999.Com
新开传奇私服网站专注于服务广大新开传奇首区和复古传奇私服玩家,我们承诺全年无休,每天为您提供最新的新开传奇网站和传奇sf999信息。...
2025-01-03
先上自己整理的UML
然后一次按各个模块解释一下。
一.输入数据:
数据的输入是,包括和mask两个成员,就是输入的图像。mask跟同高宽但是单通道。
关于mask的内容:在util.misc中的函数里面可以找到:
以整个batch为例,
:获取整个batch里面更大的w,h,用0 补齐(右,下)。
mask:宽高与图像对应,除位置为true外,其他位置都为false。最后用的时候会取反,就是补全的地方是0,图像填充的地方用1,make sence。
二.:
:包含和 。
1)特征图
以输出的单特征图为例,假设输入的size 是[2,3,768,1151],做了5次 2的卷积。获得的特征图的size是[2,2048,24,36]。而mask直接下采样(F.)得到size[2,24,36]的新mask。
2)
分两种,e和 rned
2.1)e
def forward(self, tensor_list: NestedTensor):
x = tensor_list.tensors
mask = tensor_list.mask
assert mask is not None
#按mask取反,padding区域全0,其他区域全1
not_mask = ~mask
#按 x y 方向计算累加值
y_embed = not_mask.cumsum(1, dtype=torch.float32)
x_embed = not_mask.cumsum(2, dtype=torch.float32)
#做归一化,-1是累加值更高的地方,先缩放到0~1,乘以scale(2*Pi)
if self.normalize:
eps = 1e-6
y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
#128维 ,0-127
dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
#通过整除去除奇数,保障连续两个数的数值一样,递增的数列,[1,8.6596e+03]
dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
最后一维变成128维的
pos_x = x_embed[:, :, :, None] / dim_t
pos_y = y_embed[:, :, :, None] / dim_t
# pos_x[:, :, :, 0::2]与pos_x[:, :, :, 1::2]其实是一样的
# 一个求sin 一个求cos
pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
#最后拼接到一起
pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
return pos
如果输入的mask 是torch.Size([2, 24, 36]),最后输出的pos 是torch.Size([2, 256,24, 36])
2.2)rned
顾名思义可以学习的
def forward(self, tensor_list: NestedTensor):
x = tensor_list.tensors
h, w = x.shape[-2:]
i = torch.arange(w, device=x.device)
j = torch.arange(h, device=x.device)
x_emb = self.col_embed(i)
y_emb = self.row_embed(j)
pos = torch.cat([
x_emb.unsqueeze(0).repeat(h, 1, 1),
y_emb.unsqueeze(1).repeat(1, w, 1),
], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1)
return pos
和 都是 nn. 可以学习的。
三.:
先从init里面看成员
def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
num_decoder_layers=6, dim_feedforward=2048, dropout=0.1,
activation="relu", normalize_before=False,
return_intermediate_dec=False):
super().__init__()
encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward,
dropout, activation, normalize_before)
encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward,
dropout, activation, normalize_before)
decoder_norm = nn.LayerNorm(d_model)
self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm,
return_intermediate=return_intermediate_dec)
self._reset_parameters()
self.d_model = d_model
self.nhead = nhead
其中最重要的就是和,分别都是yer和yer的nn.。可以理解成多个Layer串形拼接,且权重不共享。
再看下
def forward(self, src, mask, query_embed, pos_embed):
# flatten NxCxHxW to HWxNxC
bs, c, h, w = src.shape
src = src.flatten(2).permute(2, 0, 1)
pos_embed = pos_embed.flatten(2).permute(2, 0, 1)
query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1)
mask = mask.flatten(1)
tgt = torch.zeros_like(query_embed)
memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
hs = self.decoder(tgt, memory, memory_key_padding_mask=mask,
pos=pos_embed, query_pos=query_embed)
return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)
和的调用
self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos[-1])[0]
这里self. 是一个conv,将上述2048的通道压缩到256,将src pos都到[L,N,E]其中L是的长度(就是特征图的w*h),N是,E是编码的size(这里为256)。
self..就是一个可以学习的权重
self.query_embed = nn.Embedding(num_queries, hidden_dim)
1.细看yer
先从init看成员
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
activation="relu", normalize_before=False):
super().__init__()
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
# Implementation of Feedforward model
self.linear1 = nn.Linear(d_model, dim_feedforward)
self.dropout = nn.Dropout(dropout)
self.linear2 = nn.Linear(dim_feedforward, d_model)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
self.activation = _get_activation_fn(activation)
self.normalize_before = normalize_before
都挺顾名思义的
然后看,实际上调用的是 或者 , 唯一的区别就是的先后。这边解析一下
def forward_post(self,
src,
src_mask: Optional[Tensor] = None,
src_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None):
#就是src,pos相加,self attention, query key都一样
q = k = self.with_pos_embed(src, pos)
#Key_padding_mask 是由于一个batch中序列长度不一,通过mask将padding的
#部分的attention权重置0,就是nestedtensor里面的mask.
#attn_mask就是masked attention里面用的mask,在预测的时候后面的序列不可知
#需要mask掉,但是detr里面没有这个问题,所以都是空。
src2 = self.self_attn(q, k, value=src, attn_mask=src_mask,
key_padding_mask=src_key_padding_mask)[0]
src = src + self.dropout1(src2)
src = self.norm1(src)
src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
src = src + self.dropout2(src2)
src = self.norm2(src)
return src
这里k,q,v都是来源与src。key query都是加了 的,但是value没有加。
里面的就是调用了6次yer,除了之一个yer的src是从来的,其他的都是从上一个yer来的。
2.细看yer
也是先从init开始
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
activation="relu", normalize_before=False):
super().__init__()
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
# Implementation of Feedforward model
self.linear1 = nn.Linear(d_model, dim_feedforward)
self.dropout = nn.Dropout(dropout)
self.linear2 = nn.Linear(dim_feedforward, d_model)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
self.dropout3 = nn.Dropout(dropout)
self.activation = _get_activation_fn(activation)
self.normalize_before = normalize_before
用self 和 -
同样只分析
def forward_post(self, tgt, memory,
tgt_mask: Optional[Tensor] = None,
memory_mask: Optional[Tensor] = None,
tgt_key_padding_mask: Optional[Tensor] = None,
memory_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None):
# tgt+query_pos 获得query key
q = k = self.with_pos_embed(tgt, query_pos)
# self attention
tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask,
key_padding_mask=tgt_key_padding_mask)[0]
tgt = tgt + self.dropout1(tgt2)
tgt = self.norm1(tgt)
#encoder-decoder attention
#query 是tgt+query_pos tgt一部分来自self attention
#key 是 memory+pos
#value就是memory
tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos),
key=self.with_pos_embed(memory, pos),
value=memory, attn_mask=memory_mask,
key_padding_mask=memory_key_padding_mask)[0]
tgt = tgt + self.dropout2(tgt2)
tgt = self.norm2(tgt)
tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
tgt = tgt + self.dropout3(tgt2)
tgt = self.norm3(tgt)
return tgt
输入到到tgt是全0 [100,batch,256],代表100个query;是从获取的,pos和中一样;就是可以学习的,和tgt shape一致(nn.)。
看一下detr的
def forward(self, samples: NestedTensor):
""" The forward expects a NestedTensor, which consists of:
- samples.tensor: batched images, of shape [batch_size x 3 x H x W]
- samples.mask: a binary mask of shape [batch_size x H x W], containing 1 on padded pixels
It returns a dict with the following elements:
- "pred_logits": the classification logits (including no-object) for all queries.
Shape= [batch_size x num_queries x (num_classes + 1)]
- "pred_boxes": The normalized boxes coordinates for all queries, represented as
(center_x, center_y, height, width). These values are normalized in [0, 1],
relative to the size of each individual image (disregarding possible padding).
See PostProcess for information on how to retrieve the unnormalized bounding box.
- "aux_outputs": Optional, only returned when auxilary losses are activated. It is a list of
dictionnaries containing the two above keys for each decoder layer.
"""
if isinstance(samples, (list, torch.Tensor)):
samples = nested_tensor_from_tensor_list(samples)
features, pos = self.backbone(samples)
src, mask = features[-1].decompose()
assert mask is not None
hs = self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos[-1])[0]
outputs_class = self.class_embed(hs)
outputs_coord = self.bbox_embed(hs).sigmoid()
out = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1]}
if self.aux_loss:
out['aux_outputs'] = self._set_aux_loss(outputs_class, outputs_coord)
return out
self.是 nn.; self.是MLP。
最后将的输出转 class and coord,由于解码用了6层,将每一层的解码输出都存下来了,所以hs的shape是【6,batch,100,256】
其中最后一层的结果存在
剩下五层存在
四.:用来计算
看 类
可以计算多种loss
def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
def loss_cardinality(self, outputs, targets, indices, num_boxes):
def loss_boxes(self, outputs, targets, indices, num_boxes):
def loss_masks(self, outputs, targets, indices, num_boxes):
计算方法跟主流传统的/差不多。
最关键的地方是如何用匈牙利匹配算法,将100个query分成跟 truth们匹配上,匹配不上的就是负样本,分类直接分成。
的函数里面由一句关键的
indices = self.matcher(outputs_without_aux, targets)
来找到正确的匹配。
来看
先看下init
def __init__(self, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1):
"""Creates the matcher
Params:
cost_class: This is the relative weight of the classification error in the matching cost
cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
"""
super().__init__()
self.cost_class = cost_class
self.cost_bbox = cost_bbox
self.cost_giou = cost_giou
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
这里指定了各个loss的权重,最后用各个loss的加权平均作为匈牙利匹配算法的权重。
接着看
@torch.no_grad()
def forward(self, outputs, targets):
""" Performs the matching
Params:
outputs: This is a dict that contains at least these entries:
"pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
"pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
"labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
objects in the target) containing the class labels
"boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
Returns:
A list of size batch_size, containing tuples of (index_i, index_j) where:
- index_i is the indices of the selected predictions (in order)
- index_j is the indices of the corresponding selected targets (in order)
For each batch element, it holds:
len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
"""
bs, num_queries = outputs["pred_logits"].shape[:2]
# We flatten to compute the cost matrices in a batch
out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes]
out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4]
# Also concat the target labels and boxes
tgt_ids = torch.cat([v["labels"] for v in targets])
tgt_bbox = torch.cat([v["boxes"] for v in targets])
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
# but approximate it in 1 - proba[target class].
# The 1 is a constant that doesn't change the matching, it can be ommitted.
cost_class = -out_prob[:, tgt_ids]
# Compute the L1 cost between boxes
cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
# Compute the giou cost betwen boxes
cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
# Final cost matrix
C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
C = C.view(bs, num_queries, -1).cpu()
sizes = [len(v["boxes"]) for v in targets]
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
分别计算分类的loss,boxes的和giou,加权获得C。将C输入到scipy..t,(匹配部分的算法是不参与梯度计算)。
注意如果detr的输出中有 '',那其中每一层都要按上述方式计算一遍loss。
的时候只看最后一层的输出,100个query中只要挑出分类为前景的直接使用即可。
版权声明:本文内容由互联网用户自发贡献,本站不拥有所有权,不承担相关法律责任。如果发现本站有涉嫌抄袭的内容,请告知我们,本站将立刻删除涉嫌侵权内容。
相关文章
新开传奇私服网站专注于服务广大新开传奇首区和复古传奇私服玩家,我们承诺全年无休,每天为您提供最新的新开传奇网站和传奇sf999信息。...
2025-01-03
明确否认him存在,但是我们可以从一些蛛丝马迹中发现官方有说谎的可能性。就有细心的玩家,从这五个地方观察到了him存在,总共4个证据。...
2025-01-03
8条回答:【推荐答案】主公莫慌(手游)-新手奖励1.序列号:gsaq6617(官方)固定唯一,可重复激活无数新账号。2.奖励:3000绿钻,1000将魂,1紫将...
2025-01-03
网盘提取码: xawy帝国cms内核仿《新趣头条》娱乐游戏资讯网站源码,适合做电竞,娱乐,评测类的站点。利用模板改改可以轻松完成1个站点。不带安装教程...
2025-01-03
热评文章
2022年专属火龙之神途新版
1.80龙神合击传奇
1.76永恒小极品+5复古传奇
1.76双倍魔天大极品第三季单职业
1.76神梦传奇三职业
1.80聖统圣统合击三职业传奇