Spaces:
Running
Running
Model creation and training for basic layers.
Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch_model_ops.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
| 1 |
"""Boxes for defining PyTorch models."""
|
| 2 |
|
|
|
|
| 3 |
from lynxkite.core import ops, workspace
|
| 4 |
from lynxkite.core.ops import Parameter as P
|
| 5 |
import torch
|
| 6 |
import torch_geometric as pyg
|
|
|
|
| 7 |
|
| 8 |
ENV = "PyTorch model"
|
| 9 |
|
|
@@ -70,7 +72,7 @@ reg(
|
|
| 70 |
reg(
|
| 71 |
"Activation",
|
| 72 |
inputs=["x"],
|
| 73 |
-
params=[P.options("type", ["ReLU", "
|
| 74 |
)
|
| 75 |
reg("Concatenate", inputs=["a", "b"], outputs=["x"])
|
| 76 |
reg("Add", inputs=["a", "b"], outputs=["x"])
|
|
@@ -105,7 +107,10 @@ ops.register_passive_op(
|
|
| 105 |
"Repeat",
|
| 106 |
inputs=[ops.Input(name="input", position="top", type="tensor")],
|
| 107 |
outputs=[ops.Output(name="output", position="bottom", type="tensor")],
|
| 108 |
-
params=[
|
|
|
|
|
|
|
|
|
|
| 109 |
)
|
| 110 |
|
| 111 |
ops.register_passive_op(
|
|
@@ -117,24 +122,133 @@ ops.register_passive_op(
|
|
| 117 |
)
|
| 118 |
|
| 119 |
|
| 120 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
"""Builds the model described in the workspace."""
|
| 122 |
optimizers = []
|
|
|
|
| 123 |
for node in ws.nodes:
|
| 124 |
-
|
| 125 |
-
|
|
|
|
| 126 |
assert optimizers, "No optimizer found."
|
| 127 |
assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
|
| 128 |
[optimizer] = optimizers
|
| 129 |
-
|
|
|
|
|
|
|
| 130 |
for e in ws.edges:
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
| 134 |
sizes = {}
|
| 135 |
-
for k,
|
| 136 |
-
sizes[k] =
|
| 137 |
-
|
| 138 |
-
layers
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Boxes for defining PyTorch models."""
|
| 2 |
|
| 3 |
+
import graphlib
|
| 4 |
from lynxkite.core import ops, workspace
|
| 5 |
from lynxkite.core.ops import Parameter as P
|
| 6 |
import torch
|
| 7 |
import torch_geometric as pyg
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
|
| 10 |
ENV = "PyTorch model"
|
| 11 |
|
|
|
|
| 72 |
reg(
|
| 73 |
"Activation",
|
| 74 |
inputs=["x"],
|
| 75 |
+
params=[P.options("type", ["ReLU", "Leaky ReLU", "Tanh", "Mish"])],
|
| 76 |
)
|
| 77 |
reg("Concatenate", inputs=["a", "b"], outputs=["x"])
|
| 78 |
reg("Add", inputs=["a", "b"], outputs=["x"])
|
|
|
|
| 107 |
"Repeat",
|
| 108 |
inputs=[ops.Input(name="input", position="top", type="tensor")],
|
| 109 |
outputs=[ops.Output(name="output", position="bottom", type="tensor")],
|
| 110 |
+
params=[
|
| 111 |
+
ops.Parameter.basic("times", 1, int),
|
| 112 |
+
ops.Parameter.basic("same_weights", True, bool),
|
| 113 |
+
],
|
| 114 |
)
|
| 115 |
|
| 116 |
ops.register_passive_op(
|
|
|
|
| 122 |
)
|
| 123 |
|
| 124 |
|
| 125 |
+
def _to_id(s: str) -> str:
|
| 126 |
+
"""Replaces all non-alphanumeric characters with underscores."""
|
| 127 |
+
return "".join(c if c.isalnum() else "_" for c in s)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
@dataclass
|
| 131 |
+
class ModelConfig:
|
| 132 |
+
model: torch.nn.Module
|
| 133 |
+
model_inputs: list[str]
|
| 134 |
+
model_outputs: list[str]
|
| 135 |
+
loss_inputs: list[str]
|
| 136 |
+
loss: torch.nn.Module
|
| 137 |
+
optimizer: torch.optim.Optimizer
|
| 138 |
+
|
| 139 |
+
def _forward(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
|
| 140 |
+
model_inputs = [inputs[i] for i in self.model_inputs]
|
| 141 |
+
output = self.model(*model_inputs)
|
| 142 |
+
if not isinstance(output, tuple):
|
| 143 |
+
output = (output,)
|
| 144 |
+
values = {k: v for k, v in zip(self.model_outputs, output)}
|
| 145 |
+
return values
|
| 146 |
+
|
| 147 |
+
def inference(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
|
| 148 |
+
# TODO: Do multiple batches.
|
| 149 |
+
self.model.eval()
|
| 150 |
+
return self._forward(inputs)
|
| 151 |
+
|
| 152 |
+
def train(self, inputs: dict[str, torch.Tensor]) -> float:
|
| 153 |
+
"""Train the model for one epoch. Returns the loss."""
|
| 154 |
+
# TODO: Do multiple batches.
|
| 155 |
+
self.model.train()
|
| 156 |
+
self.optimizer.zero_grad()
|
| 157 |
+
values = self._forward(inputs)
|
| 158 |
+
values.update(inputs)
|
| 159 |
+
loss_inputs = [values[i] for i in self.loss_inputs]
|
| 160 |
+
loss = self.loss(*loss_inputs)
|
| 161 |
+
loss.backward()
|
| 162 |
+
self.optimizer.step()
|
| 163 |
+
return loss.item()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def build_model(
|
| 167 |
+
ws: workspace.Workspace, inputs: dict[str, torch.Tensor]
|
| 168 |
+
) -> ModelConfig:
|
| 169 |
"""Builds the model described in the workspace."""
|
| 170 |
optimizers = []
|
| 171 |
+
nodes = {}
|
| 172 |
for node in ws.nodes:
|
| 173 |
+
nodes[node.id] = node
|
| 174 |
+
if node.data.title == "Optimizer":
|
| 175 |
+
optimizers.append(node.id)
|
| 176 |
assert optimizers, "No optimizer found."
|
| 177 |
assert len(optimizers) == 1, f"More than one optimizer found: {optimizers}"
|
| 178 |
[optimizer] = optimizers
|
| 179 |
+
dependencies = {n.id: [] for n in ws.nodes}
|
| 180 |
+
edges = {}
|
| 181 |
+
# TODO: Dissolve repeat boxes here.
|
| 182 |
for e in ws.edges:
|
| 183 |
+
dependencies[e.target].append(e.source)
|
| 184 |
+
edges.setdefault((e.target, e.targetHandle), []).append(
|
| 185 |
+
(e.source, e.sourceHandle)
|
| 186 |
+
)
|
| 187 |
sizes = {}
|
| 188 |
+
for k, i in inputs.items():
|
| 189 |
+
sizes[k] = i.shape[-1]
|
| 190 |
+
ts = graphlib.TopologicalSorter(dependencies)
|
| 191 |
+
layers = []
|
| 192 |
+
loss_layers = []
|
| 193 |
+
in_loss = set()
|
| 194 |
+
cfg = {}
|
| 195 |
+
loss_inputs = set()
|
| 196 |
+
used_inputs = set()
|
| 197 |
+
for node_id in ts.static_order():
|
| 198 |
+
node = nodes[node_id]
|
| 199 |
+
t = node.data.title
|
| 200 |
+
p = node.data.params
|
| 201 |
+
for b in dependencies[node_id]:
|
| 202 |
+
if b in in_loss:
|
| 203 |
+
in_loss.add(node_id)
|
| 204 |
+
ls = loss_layers if node_id in in_loss else layers
|
| 205 |
+
nid = _to_id(node_id)
|
| 206 |
+
match t:
|
| 207 |
+
case "Linear":
|
| 208 |
+
[(ib, ih)] = edges[node_id, "x"]
|
| 209 |
+
i = _to_id(ib) + "_" + ih
|
| 210 |
+
used_inputs.add(i)
|
| 211 |
+
isize = sizes[i]
|
| 212 |
+
osize = isize if p["output_dim"] == "same" else int(p["output_dim"])
|
| 213 |
+
ls.append((torch.nn.Linear(isize, osize), f"{i} -> {nid}_x"))
|
| 214 |
+
sizes[f"{nid}_x"] = osize
|
| 215 |
+
case "Activation":
|
| 216 |
+
[(ib, ih)] = edges[node_id, "x"]
|
| 217 |
+
i = _to_id(ib) + "_" + ih
|
| 218 |
+
used_inputs.add(i)
|
| 219 |
+
f = getattr(torch.nn.functional, p["type"].lower().replace(" ", "_"))
|
| 220 |
+
ls.append((f, f"{i} -> {nid}_x"))
|
| 221 |
+
sizes[f"{nid}_x"] = sizes[i]
|
| 222 |
+
case "MSE loss":
|
| 223 |
+
[(xb, xh)] = edges[node_id, "x"]
|
| 224 |
+
xi = _to_id(xb) + "_" + xh
|
| 225 |
+
[(yb, yh)] = edges[node_id, "y"]
|
| 226 |
+
yi = _to_id(yb) + "_" + yh
|
| 227 |
+
loss_inputs.add(xi)
|
| 228 |
+
loss_inputs.add(yi)
|
| 229 |
+
in_loss.add(node_id)
|
| 230 |
+
loss_layers.append(
|
| 231 |
+
(torch.nn.functional.mse_loss, f"{xi}, {yi} -> {nid}_loss")
|
| 232 |
+
)
|
| 233 |
+
cfg["model_inputs"] = used_inputs & inputs.keys()
|
| 234 |
+
cfg["model_outputs"] = loss_inputs - inputs.keys()
|
| 235 |
+
cfg["loss_inputs"] = loss_inputs
|
| 236 |
+
# Make sure the trained output is output from the last model layer.
|
| 237 |
+
outputs = ", ".join(cfg["model_outputs"])
|
| 238 |
+
layers.append((torch.nn.Identity(), f"{outputs} -> {outputs}"))
|
| 239 |
+
# Create model.
|
| 240 |
+
cfg["model"] = pyg.nn.Sequential(", ".join(used_inputs & inputs.keys()), layers)
|
| 241 |
+
# Make sure the loss is output from the last loss layer.
|
| 242 |
+
[(lossb, lossh)] = edges[optimizer, "loss"]
|
| 243 |
+
lossi = _to_id(lossb) + "_" + lossh
|
| 244 |
+
loss_layers.append((torch.nn.Identity(), f"{lossi} -> loss"))
|
| 245 |
+
# Create loss function.
|
| 246 |
+
cfg["loss"] = pyg.nn.Sequential(", ".join(loss_inputs), loss_layers)
|
| 247 |
+
assert not list(cfg["loss"].parameters()), (
|
| 248 |
+
f"loss should have no parameters: {list(cfg['loss'].parameters())}"
|
| 249 |
+
)
|
| 250 |
+
# Create optimizer.
|
| 251 |
+
p = nodes[optimizer].data.params
|
| 252 |
+
o = getattr(torch.optim, p["type"])
|
| 253 |
+
cfg["optimizer"] = o(cfg["model"].parameters(), lr=p["lr"])
|
| 254 |
+
return ModelConfig(**cfg)
|
lynxkite-graph-analytics/tests/test_pytorch_model_ops.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from lynxkite.core import workspace
|
| 2 |
+
from lynxkite_graph_analytics import pytorch_model_ops
|
| 3 |
+
import torch
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str, str, str]]):
|
| 8 |
+
ws = workspace.Workspace(env=env)
|
| 9 |
+
for id, data in nodes.items():
|
| 10 |
+
ws.nodes.append(
|
| 11 |
+
workspace.WorkspaceNode(
|
| 12 |
+
id=id,
|
| 13 |
+
type="basic",
|
| 14 |
+
data=workspace.WorkspaceNodeData(title=data["title"], params=data),
|
| 15 |
+
position=workspace.Position(
|
| 16 |
+
x=data.get("x", 0),
|
| 17 |
+
y=data.get("y", 0),
|
| 18 |
+
),
|
| 19 |
+
)
|
| 20 |
+
)
|
| 21 |
+
ws.edges = [
|
| 22 |
+
workspace.WorkspaceEdge(
|
| 23 |
+
id=f"{source}->{target}",
|
| 24 |
+
source=source.split(":")[0],
|
| 25 |
+
target=target.split(":")[0],
|
| 26 |
+
sourceHandle=source.split(":")[1],
|
| 27 |
+
targetHandle=target.split(":")[1],
|
| 28 |
+
)
|
| 29 |
+
for source, target in edges
|
| 30 |
+
]
|
| 31 |
+
return ws
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
async def test_build_model():
|
| 35 |
+
ws = make_ws(
|
| 36 |
+
pytorch_model_ops.ENV,
|
| 37 |
+
{
|
| 38 |
+
"emb": {"title": "Input: embedding"},
|
| 39 |
+
"lin": {"title": "Linear", "output_dim": "same"},
|
| 40 |
+
"act": {"title": "Activation", "type": "Leaky ReLU"},
|
| 41 |
+
"label": {"title": "Input: label"},
|
| 42 |
+
"loss": {"title": "MSE loss"},
|
| 43 |
+
"optim": {"title": "Optimizer", "type": "SGD", "lr": 0.1},
|
| 44 |
+
},
|
| 45 |
+
[
|
| 46 |
+
("emb:x", "lin:x"),
|
| 47 |
+
("lin:x", "act:x"),
|
| 48 |
+
("act:x", "loss:x"),
|
| 49 |
+
("label:y", "loss:y"),
|
| 50 |
+
("loss:loss", "optim:loss"),
|
| 51 |
+
],
|
| 52 |
+
)
|
| 53 |
+
x = torch.rand(100, 4)
|
| 54 |
+
y = x + 1
|
| 55 |
+
m = pytorch_model_ops.build_model(ws, {"emb_x": x, "label_y": y})
|
| 56 |
+
for i in range(1000):
|
| 57 |
+
loss = m.train({"emb_x": x, "label_y": y})
|
| 58 |
+
assert loss < 0.1
|
| 59 |
+
o = m.inference({"emb_x": x[:1]})
|
| 60 |
+
error = torch.nn.functional.mse_loss(o["act_x"], x[:1] + 1)
|
| 61 |
+
assert error < 0.1
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
pytest.main()
|