1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
import math
from typing import Any
import candle
from candle import Tensor
from .module import Module
# See https://github.com/pytorch/pytorch/blob/main/torch/nn/modules/linear.py
class Identity(Module):
r"""A placeholder identity operator that is argument-insensitive.
Args:
args: any argument (unused)
kwargs: any keyword argument (unused)
Shape:
- Input: :math:`(*)`, where :math:`*` means any number of dimensions.
- Output: :math:`(*)`, same shape as the input.
Examples::
>>> m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)
>>> input = candle.randn(128, 20)
>>> output = m(input)
>>> print(output.shape)
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__()
def forward(self, input: Tensor) -> Tensor:
return input
class Linear(Module):
r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
Args:
in_features: size of each input sample
out_features: size of each output sample
bias: If set to ``False``, the layer will not learn an additive bias.
Default: ``True``
Shape:
- Input: :math:`(*, H_{in})` where :math:`*` means any number of
dimensions including none and :math:`H_{in} = \text{in\_features}`.
- Output: :math:`(*, H_{out})` where all but the last dimension
are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
Attributes:
weight: the learnable weights of the module of shape
:math:`(\text{out\_features}, \text{in\_features})`. The values are
initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
:math:`k = \frac{1}{\text{in\_features}}`
bias: the learnable bias of the module of shape :math:`(\text{out\_features})`.
If :attr:`bias` is ``True``, the values are initialized from
:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
:math:`k = \frac{1}{\text{in\_features}}`
"""
__constants__ = ["in_features", "out_features"]
in_features: int
out_features: int
weight: Tensor
def __init__(
self,
in_features: int,
out_features: int,
bias: bool = True,
device=None,
dtype=None,
) -> None:
factory_kwargs = {"device": device, "dtype": dtype}
super().__init__()
# Allow 'weight' to be quantized
self._quantizable_buffers.add("weight")
self.in_features = in_features
self.out_features = out_features
# TODO: Do actual initialization here: e.g. kaiming_uniform or xavier_uniform
self.weight = candle.ones((out_features, in_features), **factory_kwargs)
if bias:
self.bias = candle.zeros((out_features,), **factory_kwargs)
else:
self.bias = None
def forward(self, x: Tensor) -> Tensor:
dims = x.shape
last_dim = dims[-1]
if isinstance(self.weight, candle.QTensor):
if len(dims) < 3:
matmul_result = self.weight.matmul_t(x).broadcast_add(self.bias)
elif len(dims) == 3:
b, n, m = dims
output_shape = (b, n, self.out_features)
re = x.reshape((b * n, m))
matmul_result = self.weight.matmul_t(re).reshape((output_shape))
else:
raise NotImplementedError("'QTensor.matmul_t' is not implemented for more than 3 dimensions")
if self.bias:
return matmul_result.broadcast_add(self.bias)
else:
if self.weight.shape[-1] == last_dim and len(dims) < 3:
w = self.weight.t()
else:
batch_size = dims[0]
w = self.weight.broadcast_left((batch_size,)).t()
x = x.matmul(w)
if self.bias is not None:
x = x.broadcast_add(self.bias)
return x
def extra_repr(self) -> str:
return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"
|