Spaces:

OmniSVG
/

OmniSVG-3B

Paused

App Files Files Community

OmniSVG commited on 16 days ago

Commit

c1ce505

verified ·

1 Parent(s): c0c168a

Upload 80 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

deepsvg/__init__.py +0 -0
deepsvg/__pycache__/__init__.cpython-310.pyc +0 -0
deepsvg/config.py +101 -0
deepsvg/difflib/__pycache__/tensor.cpython-310.pyc +0 -0
deepsvg/difflib/loss.py +51 -0
deepsvg/difflib/tensor.py +249 -0
deepsvg/difflib/utils.py +81 -0
deepsvg/gui/README.md +2 -0
deepsvg/gui/__init__.py +0 -0
deepsvg/gui/config.py +5 -0
deepsvg/gui/deepsvg.kv +380 -0
deepsvg/gui/interpolate.py +126 -0
deepsvg/gui/layout/__init__.py +0 -0
deepsvg/gui/layout/aligned_textinput.py +52 -0
deepsvg/gui/main.py +794 -0
deepsvg/gui/res/down.png +0 -0
deepsvg/gui/res/hand.png +0 -0
deepsvg/gui/res/hand.svg +1 -0
deepsvg/gui/res/pause.png +0 -0
deepsvg/gui/res/pen.png +0 -0
deepsvg/gui/res/pen.svg +1 -0
deepsvg/gui/res/pencil.png +0 -0
deepsvg/gui/res/pencil.svg +1 -0
deepsvg/gui/res/play.png +0 -0
deepsvg/gui/res/play.svg +3 -0
deepsvg/gui/res/switch.png +0 -0
deepsvg/gui/res/up.png +0 -0
deepsvg/gui/state/__init__.py +0 -0
deepsvg/gui/state/project.py +115 -0
deepsvg/gui/state/state.py +78 -0
deepsvg/gui/utils.py +66 -0
deepsvg/model/basic_blocks.py +101 -0
deepsvg/model/config.py +107 -0
deepsvg/model/layers/__init__.py +0 -0
deepsvg/model/layers/attention.py +161 -0
deepsvg/model/layers/functional.py +256 -0
deepsvg/model/layers/improved_transformer.py +141 -0
deepsvg/model/layers/positional_encoding.py +43 -0
deepsvg/model/layers/transformer.py +393 -0
deepsvg/model/layers/utils.py +36 -0
deepsvg/model/loss.py +104 -0
deepsvg/model/model.py +690 -0
deepsvg/model/utils.py +84 -0
deepsvg/model/vector_quantize_pytorch.py +605 -0
deepsvg/schedulers/warmup.py +67 -0
deepsvg/svg_dataset.py +269 -0
deepsvg/svglib/__init__.py +0 -0
deepsvg/svglib/__pycache__/__init__.cpython-310.pyc +0 -0
deepsvg/svglib/__pycache__/geom.cpython-310.pyc +0 -0
deepsvg/svglib/__pycache__/svg.cpython-310.pyc +0 -0

deepsvg/__init__.py ADDED Viewed

File without changes

deepsvg/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (139 Bytes). View file

deepsvg/config.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import torch.optim as optim
+from deepsvg.schedulers.warmup import GradualWarmupScheduler
+class _Config:
+    """
+    Training config.
+    """
+    def __init__(self, num_gpus=1):
+        self.num_gpus = num_gpus                              #
+        self.dataloader_module = "deepsvg.svgtensor_dataset"  #
+        self.collate_fn = None                                #
+        self.data_dir = "./dataset/icons_tensor/"             #
+        self.meta_filepath = "./dataset/icons_meta.csv"       #
+        self.loader_num_workers = 0                           #
+        self.pretrained_path = None                           #
+        self.model_cfg = None                                 #
+        self.num_epochs = None                                #
+        self.num_steps = None                                 #
+        self.learning_rate = 1e-3                             #
+        self.batch_size = 100                                 #
+        self.warmup_steps = 500                               #
+        # Dataset
+        self.train_ratio = 1.0                                #
+        self.nb_augmentations = 1                             #
+        self.max_num_groups = 15                              #
+        self.max_seq_len = 30                                 #
+        self.max_total_len = None                             #
+        self.filter_uni = None                                #
+        self.filter_category = None                           #
+        self.filter_platform = None                           #
+        self.filter_labels = None                             #
+        self.grad_clip = None                                 #
+        self.log_every = 20                                   #
+        self.val_every = 1000                                 #
+        self.ckpt_every = 1000                                #
+        self.stats_to_print = {
+            "train": ["lr", "time"]
+        }
+        self.model_args = []                                  #
+        self.optimizer_starts = [0]                           #
+    # Overridable methods
+    def make_model(self):
+        raise NotImplementedError
+    def make_losses(self):
+        raise NotImplementedError
+    def make_optimizers(self, model):
+        return [optim.AdamW(model.parameters(), self.learning_rate)]
+    def make_schedulers(self, optimizers, epoch_size):
+        return [None] * len(optimizers)
+    def make_warmup_schedulers(self, optimizers, scheduler_lrs):
+        return [GradualWarmupScheduler(optimizer, multiplier=1.0, total_epoch=self.warmup_steps, after_scheduler=scheduler_lr)
+                for optimizer, scheduler_lr in zip(optimizers, scheduler_lrs)]
+    def get_params(self, step, epoch):
+        return {}
+    def get_weights(self, step, epoch):
+        return {}
+    def set_train_vars(self, train_vars, dataloader):
+        pass
+    def visualize(self, model, output, train_vars, step, epoch, summary_writer, visualization_dir):
+        pass
+    # Utility methods
+    def values(self):
+        for key in dir(self):
+            if not key.startswith("__") and not callable(getattr(self, key)):
+                yield key, getattr(self, key)
+    def to_dict(self):
+        return {key: val for key, val in self.values()}
+    def load_dict(self, dict):
+        for key, val in dict.items():
+            setattr(self, key, val)
+    def print_params(self):
+        for key, val in self.values():
+            print(f"  {key} = {val}")

deepsvg/difflib/__pycache__/tensor.cpython-310.pyc ADDED Viewed

Binary file (8.57 kB). View file

deepsvg/difflib/loss.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import numpy as np
+from .utils import *
+def chamfer_loss(x, y):
+    d = torch.cdist(x, y)
+    return d.min(dim=0).values.mean() + d.min(dim=1).values.mean()
+def continuity_loss(x):
+    d = (x[1:] - x[:-1]).norm(dim=-1, p=2)
+    return d.mean()
+def svg_length_loss(p_pred, p_target):
+    pred_length, target_length = get_length(p_pred), get_length(p_target)
+    return (target_length - pred_length).abs() / target_length
+def svg_emd_loss(p_pred, p_target,
+                 first_point_weight=False, return_matched_indices=False):
+    n, m = len(p_pred), len(p_target)
+    if n == 0:
+        return 0.
+    # Make target point lists clockwise
+    p_target = make_clockwise(p_target)
+    # Compute length distribution
+    distr_pred =  torch.linspace(0., 1., n).to(p_pred.device)
+    distr_target = get_length_distribution(p_target, normalize=True)
+    d = torch.cdist(distr_pred.unsqueeze(-1), distr_target.unsqueeze(-1))
+    matching = d.argmin(dim=-1)
+    p_target_sub = p_target[matching]
+    # EMD
+    i = np.argmin([torch.norm(p_pred - reorder(p_target_sub, i), dim=-1).mean() for i in range(n)])
+    losses = torch.norm(p_pred - reorder(p_target_sub, i), dim=-1)
+    if first_point_weight:
+        weights = torch.ones_like(losses)
+        weights[0] = 10.
+        losses = losses * weights
+    if return_matched_indices:
+        return losses.mean(), (p_pred, p_target, reorder(matching, i))
+    return losses.mean()

deepsvg/difflib/tensor.py ADDED Viewed

	@@ -0,0 +1,249 @@

+from __future__ import annotations
+import torch
+import torch.utils.data
+from typing import Union
+Num = Union[int, float]
+class SVGTensor:
+    #                       0    1    2    3     4      5     6
+    COMMANDS_SIMPLIFIED = ["m", "l", "c", "a", "EOS", "SOS", "z"]
+    #                              rad  x  lrg sw  ctrl ctrl  end
+    #                              ius axs arc eep  1    2    pos
+    #                                   rot fg fg
+    CMD_ARGS_MASK = torch.tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],   # m
+                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],   # l
+                                  [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],   # c
+                                  [1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1],   # a
+                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],   # EOS
+                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],   # SOS
+                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])  # z
+    class Index:
+        COMMAND = 0
+        RADIUS = slice(1, 3)
+        X_AXIS_ROT = 3
+        LARGE_ARC_FLG = 4
+        SWEEP_FLG = 5
+        START_POS = slice(6, 8)
+        CONTROL1 = slice(8, 10)
+        CONTROL2 = slice(10, 12)
+        END_POS = slice(12, 14)
+    class IndexArgs:
+        RADIUS = slice(0, 2)
+        X_AXIS_ROT = 2
+        LARGE_ARC_FLG = 3
+        SWEEP_FLG = 4
+        CONTROL1 = slice(5, 7)
+        CONTROL2 = slice(7, 9)
+        END_POS = slice(9, 11)
+    position_keys = ["control1", "control2", "end_pos"]
+    all_position_keys = ["start_pos", *position_keys]
+    arg_keys = ["radius", "x_axis_rot", "large_arc_flg", "sweep_flg", *position_keys]
+    all_arg_keys = [*arg_keys[:4], "start_pos", *arg_keys[4:]]
+    cmd_arg_keys = ["commands", *arg_keys]
+    all_keys = ["commands", *all_arg_keys]
+    def __init__(self, commands, radius, x_axis_rot, large_arc_flg, sweep_flg, control1, control2, end_pos,
+                 seq_len=None, label=None, PAD_VAL=-1, ARGS_DIM=256, filling=0):
+        self.commands = commands.reshape(-1, 1).float()
+        self.radius = radius.float()
+        self.x_axis_rot = x_axis_rot.reshape(-1, 1).float()
+        self.large_arc_flg = large_arc_flg.reshape(-1, 1).float()
+        self.sweep_flg = sweep_flg.reshape(-1, 1).float()
+        self.control1 = control1.float()
+        self.control2 = control2.float()
+        self.end_pos = end_pos.float()
+        self.seq_len = torch.tensor(len(commands)) if seq_len is None else seq_len
+        self.label = label
+        self.PAD_VAL = PAD_VAL
+        self.ARGS_DIM = ARGS_DIM
+        self.sos_token = torch.Tensor([self.COMMANDS_SIMPLIFIED.index("SOS")]).unsqueeze(-1)
+        self.eos_token = self.pad_token = torch.Tensor([self.COMMANDS_SIMPLIFIED.index("EOS")]).unsqueeze(-1)
+        self.filling = filling
+    @property
+    def start_pos(self):
+        start_pos = self.end_pos[:-1]
+        return torch.cat([
+            start_pos.new_zeros(1, 2),
+            start_pos
+        ])
+    @staticmethod
+    def from_data(data, *args, **kwargs):
+        return SVGTensor(data[:, SVGTensor.Index.COMMAND], data[:, SVGTensor.Index.RADIUS], data[:, SVGTensor.Index.X_AXIS_ROT],
+                         data[:, SVGTensor.Index.LARGE_ARC_FLG], data[:, SVGTensor.Index.SWEEP_FLG], data[:, SVGTensor.Index.CONTROL1],
+                         data[:, SVGTensor.Index.CONTROL2], data[:, SVGTensor.Index.END_POS], *args, **kwargs)
+    @staticmethod
+    def from_cmd_args(commands, args, *nargs, **kwargs):
+        return SVGTensor(commands, args[:, SVGTensor.IndexArgs.RADIUS], args[:, SVGTensor.IndexArgs.X_AXIS_ROT],
+                         args[:, SVGTensor.IndexArgs.LARGE_ARC_FLG], args[:, SVGTensor.IndexArgs.SWEEP_FLG], args[:, SVGTensor.IndexArgs.CONTROL1],
+                         args[:, SVGTensor.IndexArgs.CONTROL2], args[:, SVGTensor.IndexArgs.END_POS], *nargs, **kwargs)
+    def get_data(self, keys):
+        return torch.cat([self.__getattribute__(key) for key in keys], dim=-1)
+    @property
+    def data(self):
+        return self.get_data(self.all_keys)
+    def copy(self):
+        return SVGTensor(*[self.__getattribute__(key).clone() for key in self.cmd_arg_keys],
+                         seq_len=self.seq_len.clone(), label=self.label, PAD_VAL=self.PAD_VAL, ARGS_DIM=self.ARGS_DIM,
+                         filling=self.filling)
+    def add_sos(self):
+        self.commands = torch.cat([self.sos_token, self.commands])
+        for key in self.arg_keys:
+            v = self.__getattribute__(key)
+            self.__setattr__(key, torch.cat([v.new_full((1, v.size(-1)), self.PAD_VAL), v]))
+        self.seq_len += 1
+        return self
+    def drop_sos(self):
+        for key in self.cmd_arg_keys:
+            self.__setattr__(key, self.__getattribute__(key)[1:])
+        self.seq_len -= 1
+        return self
+    def add_eos(self):
+        self.commands = torch.cat([self.commands, self.eos_token])
+        for key in self.arg_keys:
+            v = self.__getattribute__(key)
+            self.__setattr__(key, torch.cat([v, v.new_full((1, v.size(-1)), self.PAD_VAL)]))
+        return self
+    def pad(self, seq_len=51):
+        pad_len = max(seq_len - len(self.commands), 0)
+        self.commands = torch.cat([self.commands, self.pad_token.repeat(pad_len, 1)])
+        for key in self.arg_keys:
+            v = self.__getattribute__(key)
+            self.__setattr__(key, torch.cat([v, v.new_full((pad_len, v.size(-1)), self.PAD_VAL)]))
+        return self
+    def unpad(self):
+        # Remove EOS + padding
+        for key in self.cmd_arg_keys:
+            self.__setattr__(key, self.__getattribute__(key)[:self.seq_len])
+        return self
+    def draw(self, *args, **kwags):
+        from deepsvg.svglib.svg import SVGPath
+        return SVGPath.from_tensor(self.data).draw(*args, **kwags)
+    def cmds(self):
+        return self.commands.reshape(-1)
+    def args(self, with_start_pos=False):
+        if with_start_pos:
+            return self.get_data(self.all_arg_keys)
+        return self.get_data(self.arg_keys)
+    def _get_real_commands_mask(self):
+        mask = self.cmds() < self.COMMANDS_SIMPLIFIED.index("EOS")
+        return mask
+    def _get_args_mask(self):
+        mask = SVGTensor.CMD_ARGS_MASK[self.cmds().long()].bool()
+        return mask
+    def get_relative_args(self):
+        data = self.args().clone()
+        real_commands = self._get_real_commands_mask()
+        data_real_commands = data[real_commands]
+        start_pos = data_real_commands[:-1, SVGTensor.IndexArgs.END_POS].clone()
+        data_real_commands[1:, SVGTensor.IndexArgs.CONTROL1] -= start_pos
+        data_real_commands[1:, SVGTensor.IndexArgs.CONTROL2] -= start_pos
+        data_real_commands[1:, SVGTensor.IndexArgs.END_POS] -= start_pos
+        data[real_commands] = data_real_commands
+        mask = self._get_args_mask()
+        data[mask] += self.ARGS_DIM - 1
+        data[~mask] = self.PAD_VAL
+        return data
+    def sample_points(self, n=10):
+        device = self.commands.device
+        z = torch.linspace(0, 1, n, device=device)
+        Z = torch.stack([torch.ones_like(z), z, z.pow(2), z.pow(3)], dim=1)
+        Q = torch.tensor([
+            [[0., 0., 0., 0.],  #  "m"
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+            [[1., 0., 0., 0.],  # "l"
+             [-1, 0., 0., 1.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+            [[1., 0., 0., 0.],  #  "c"
+             [-3, 3., 0., 0.],
+             [3., -6, 3., 0.],
+             [-1, 3., -3, 1.]],
+            torch.zeros(4, 4),  # "a", no support yet
+            torch.zeros(4, 4),  # "EOS"
+            torch.zeros(4, 4),  # "SOS"
+            torch.zeros(4, 4),  # "z"
+        ], device=device)
+        commands, pos = self.commands.reshape(-1).long(), self.get_data(self.all_position_keys).reshape(-1, 4, 2)
+        inds = (commands == self.COMMANDS_SIMPLIFIED.index("l")) | (commands == self.COMMANDS_SIMPLIFIED.index("c"))
+        commands, pos = commands[inds], pos[inds]
+        Z_coeffs = torch.matmul(Q[commands], pos)
+        # Last point being first point of next command, we drop last point except the one from the last command
+        sample_points = torch.matmul(Z, Z_coeffs)
+        sample_points = torch.cat([sample_points[:, :-1].reshape(-1, 2), sample_points[-1, -1].unsqueeze(0)])
+        return sample_points
+    @staticmethod
+    def get_length_distribution(p, normalize=True):
+        start, end = p[:-1], p[1:]
+        length_distr = torch.norm(end - start, dim=-1).cumsum(dim=0)
+        length_distr = torch.cat([length_distr.new_zeros(1), length_distr])
+        if normalize:
+            length_distr = length_distr / length_distr[-1]
+        return length_distr
+    def sample_uniform_points(self, n=100):
+        p = self.sample_points(n=n)
+        distr_unif = torch.linspace(0., 1., n).to(p.device)
+        distr = self.get_length_distribution(p, normalize=True)
+        d = torch.cdist(distr_unif.unsqueeze(-1), distr.unsqueeze(-1))
+        matching = d.argmin(dim=-1)
+        return p[matching]

deepsvg/difflib/utils.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import matplotlib.pyplot as plt
+import PIL.Image
+import io
+def set_viewbox(viewbox):
+    plt.xlim(0, viewbox[0])
+    plt.ylim(viewbox[1], 0)
+def plot_points(p, viewbox=None, show_color=False, show_colorbar=False, image_file=None, return_img=False):
+    cm = plt.cm.get_cmap('RdYlBu')
+    plt.gca().set_aspect('equal')
+    plt.gca().invert_yaxis()
+    plt.gca().axis('off')
+    if viewbox is not None:
+        set_viewbox(viewbox)
+    kwargs = {"c": range(len(p)), "cmap": cm} if show_color else {}
+    plt.scatter(p[:, 0], p[:, 1], **kwargs)
+    if show_color and show_colorbar:
+        plt.colorbar()
+    if image_file is not None:
+        plt.savefig(image_file, bbox_inches='tight')
+    if return_img:
+        buf = io.BytesIO()
+        plt.gcf().savefig(buf)
+        buf.seek(0)
+        return PIL.Image.open(buf)
+def plot_matching(p1, p2, matching, viewbox=None):
+    plt.gca().set_aspect('equal')
+    plt.gca().invert_yaxis()
+    plt.axis("off")
+    if viewbox is not None:
+        set_viewbox(viewbox)
+    plt.scatter(p1[:, 0], p1[:, 1], color="C0")
+    plt.scatter(p2[:, 0], p2[:, 1], color="C1")
+    for start, end in zip(p1[::10], p2[matching][::10]):
+        plt.plot([start[0], end[0]], [start[1], end[1]], color="C2")
+def is_clockwise(p):
+    start, end = p[:-1], p[1:]
+    return torch.stack([start, end], dim=-1).det().sum() > 0
+def make_clockwise(p):
+    if not is_clockwise(p):
+        return p.flip(dims=[0])
+    return p
+def reorder(p, i):
+    return torch.cat([p[i:], p[:i]])
+def get_length(p):
+    start, end = p[:-1], p[1:]
+    return torch.norm(end - start, dim=-1).sum()
+def get_length_distribution(p, normalize=True):
+    start, end = p[:-1], p[1:]
+    length_distr = torch.norm(end - start, dim=-1).cumsum(dim=0)
+    length_distr = torch.cat([length_distr.new_zeros(1),
+                              length_distr])
+    if normalize:
+        length_distr = length_distr / length_distr[-1]
+    return length_distr

deepsvg/gui/README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # DeepSVG Editor: a GUI for easy SVG animation
2	+

deepsvg/gui/__init__.py ADDED Viewed

File without changes

deepsvg/gui/config.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+ROOT_DIR = "./gui_data"
+STATE_PATH = os.path.join(ROOT_DIR, "state.pkl")
+TMP_PATH = os.path.join(ROOT_DIR, "tmp")

deepsvg/gui/deepsvg.kv ADDED Viewed

	@@ -0,0 +1,380 @@

+<DeepSVGWidget>:
+    orientation: "vertical"
+    Header:
+        id: header
+    BoxLayout:
+        orientation: "horizontal"
+        Sidebar:
+            id: sidebar_scroll
+        StencilView:
+            size_hint: 1, 1
+            canvas.before:
+                Color:
+                    rgb: 0.89, 0.89, 0.89
+                Rectangle:
+                    pos: self.pos
+                    size: self.size
+            EditorView:
+                id: editor
+    TimeLine:
+        id: timeline_scroll
+<Header>:
+    orientation: "horizontal"
+    size_hint_y: None
+    height: 50
+    canvas.before:
+        Color:
+            rgb: 0, 0, 0
+        Rectangle:
+            pos: self.pos
+            size: self.size
+    HeaderIcon:
+        source: "deepsvg/gui/res/hand.png"
+        index: 0
+    HeaderIcon:
+        source: "deepsvg/gui/res/pen.png"
+        index: 1
+    HeaderIcon:
+        source: "deepsvg/gui/res/pencil.png"
+        index: 2
+    Padding
+    HeaderButton:
+        text: "Clear all"
+        on_press: root.on_erase()
+    Padding
+    HeaderButton:
+        text: "Done"
+        on_press: root.on_done()
+    Label
+    Padding
+    TitleWidget:
+        text: root.title
+        on_text: root.on_title(self.text)
+    Padding
+    Label
+    HeaderButton:
+        text: "Add frame"
+        on_press: root.add_frame()
+    Padding
+    HeaderButton:
+        text: "Interpolate"
+        on_press: root.interpolate()
+    Padding
+    HeaderIcon:
+        index: 3
+        source: "deepsvg/gui/res/pause.png" if root.is_playing else "deepsvg/gui/res/play.png"
+        on_press: root.pause_animation() if root.is_playing else root.play_animation()
+<TitleWidget>:
+    size_hint_x: None
+    width: 150
+    multiline: False
+    background_color: 0, 0, 0, 1
+    background_active: ""
+    background_normal: ""
+    halign: "center"
+    valign: "middle"
+    foreground_color: 1, 1, 1, 1
+    hint_text_color: 1, 1, 1, 1
+    cursor_color: 1, 1, 1, 1
+<Sidebar>:
+    do_scroll_x: False
+    size_hint_x: None
+    width: 225
+    canvas.before:
+        Color:
+            rgb: 1, 1, 1
+        Rectangle:
+            pos: self.pos
+            size: self.size
+        Color:
+            rgb: 0.8, 0.8, 0.8
+        Line:
+            width: 1
+            rectangle: self.x, self.y, self.width, self.height
+    BoxLayout:
+        id: sidebar
+        orientation: "vertical"
+        size_hint_y: None
+        height: self.children[0].height * len(self.children) if self.children else 0
+<PathLayerView>
+    orientation: "horizontal"
+    size_hint_y: None
+    height: 40
+    canvas:
+        Color:
+            rgb: (0.08, 0.58, 0.97) if self.parent is not None and self.index == self.parent.parent.selected_path_idx else (1, 1, 1)
+        Rectangle:
+            pos: self.pos
+            size: self.size
+        Color:
+            rgb: 0.8, 0.8, 0.8
+        Line:
+            width: 1
+            rectangle: self.x, self.y, self.width, self.height
+    Label:
+        color: 0, 0, 0, 1
+        size_hint_x: None
+        text: str(root.index)
+        width: self.texture_size[0]
+        padding_x: 10
+    Label
+    Image:
+        size_hint_x: None
+        source: root.source
+        nocache: True
+    Label
+    UpButton
+    DownButton
+    Padding
+    ReverseButton
+    Label
+<UpButton>:
+    size_hint: None, None
+    height: 0.6 * self.parent.height
+    pos_hint: {'top': 0.8}
+    width: self.height
+    background_normal: ""
+    background_down: ""
+    background_color: 0.3, 0.3, 0.3, 1
+    Image:
+        source: "deepsvg/gui/res/up.png"
+        center: self.parent.center
+<DownButton>:
+    size_hint: None, None
+    height: 0.6 * self.parent.height
+    pos_hint: {'top': 0.8}
+    width: self.height
+    background_normal: ""
+    background_down: ""
+    background_color: 0.3, 0.3, 0.3, 1
+    Image:
+        source: "deepsvg/gui/res/down.png"
+        center: self.parent.center
+<ReverseButton>:
+    size_hint: None, None
+    height: 0.6 * self.parent.height
+    pos_hint: {'top': 0.8}
+    width: self.height
+    background_normal: ""
+    background_down: ""
+    background_color: 0.3, 0.3, 0.3, 1
+    Image:
+        source: "deepsvg/gui/res/switch.png"
+        center: self.parent.center
+<BezierSegment>:
+    canvas:
+        Color:
+            rgb: .769, .769, .769
+        Line:
+            points: [*self.p1, *self.q1] if root.parent and root.parent.selected and self.is_curved else []
+            dash_length: 5
+            dash_offset: 5
+        Line:
+            points: [*self.q2, *self.p2] if root.parent and root.parent.selected and self.is_curved else []
+            dash_length: 5
+            dash_offset: 5
+        Color:
+            rgb: tuple(root.parent.color) if root.parent is not None else (.043, .769, 1)
+        Line:
+            bezier: ([*self.p1, *self.q1, *self.q2, *self.p2] if self.is_curved else [*self.p1, *self.p2]) if self.is_finished else [-10000, -10000]
+            width: 1.1
+        Color:
+            rgb: 1, .616, .043
+        Point:
+            points: [*self.p1, *self.p2] if root.parent and root.parent.selected else []
+            pointsize: 1.5
+        Color:
+            rgb: .769, .769, .769
+        Point:
+            points: [*self.q1, *self.q2] if self.is_curved and root.parent and root.parent.selected else []
+            pointsize: 1.5
+<BezierPath>
+<Sketch>:
+    canvas:
+        Color:
+            rgb: root.color
+        Line:
+            points: root.points
+            width: 1.2
+<EditorView>:
+    size_hint: None, None
+    size: draw_viewbox.size
+    center: self.parent.center
+    scale: 1.5
+    DrawViewbox:
+        id: draw_viewbox
+<DrawViewbox>
+    size: 256, 256
+    canvas.before:
+        Color:
+            rgb: 1, 1, 1
+        Rectangle:
+            pos: self.pos
+            size: self.size
+<TimeLine>:
+    do_scroll_y: False
+    size_hint_y: None
+    height: 50
+    canvas.before:
+        Color:
+            rgb: 1, 1, 1
+        Rectangle:
+            pos: self.pos
+            size: self.size
+        Color:
+            rgb: 0.8, 0.8, 0.8
+        Line:
+            width: 1
+            rectangle: self.x, self.y, self.width, self.height
+    BoxLayout:
+        id: timeline
+        orientation: "horizontal"
+        size_hint_x: None
+        width: 50 * len(self.children) if self.children else 0
+<FrameView>
+    size_hint_x: None
+    width: self.height
+    color: 0, 0, 0, 1
+    text: str(self.index)
+    background_normal: ""
+    background_down: ""
+    background_color: (0.08, 0.58, 0.97, 1) if self.parent and self.index == self.parent.parent.selected_frame and self.keyframe else (0.48, 0.78, 1, 1) if self.parent and self.index == self.parent.parent.selected_frame and not self.keyframe else (1, 0.67, 0.19, 1) if self.keyframe else (1, 1, 1, 1)
+    canvas:
+        Color:
+            rgb: 0.8, 0.8, 0.8
+        Line:
+            width: 1
+            rectangle: self.x, self.y, self.width, self.height
+<HeaderIcon>:
+    size_hint_x: None
+    width: self.height
+    canvas:
+        Color:
+            rgb: (0.08, 0.58, 0.97) if self.index == self.parent.selected_tool else (0, 0, 0)
+        Rectangle:
+            pos: self.pos
+            size: self.size
+    Image:
+        source: self.parent.source
+        center: self.parent.center
+<HeaderButton>:
+    size_hint: None, None
+    height: 0.8 * self.parent.height
+    pos_hint: {'top': 0.9}
+    width: self.texture_size[0] + 40
+<Padding>:
+    size_hint_x: None
+    width: 10
+<FileChoosePopup>:
+    title: "Import SVG file"
+    size_hint: .9, .9
+    auto_dismiss: True
+    BoxLayout:
+        orientation: "vertical"
+        FileChooserIconView:
+            id: filechooser
+            path: root.path
+        BoxLayout:
+            size_hint_x: 1
+            size_hint_y: None
+            height: 50
+            pos_hint: {'center_x': .5, 'center_y': .5}
+            spacing: 20
+            Button:
+                text: "Cancel"
+                on_release: root.dismiss()
+            Button:
+                text: "Load"
+                on_release: root.load(filechooser.selection)
+                id: ldbtn
+                disabled: True if filechooser.selection==[] else False

deepsvg/gui/interpolate.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import torch
+from torch.utils.data import DataLoader
+import torch.nn as nn
+from configs.deepsvg.hierarchical_ordered import Config
+from deepsvg import utils
+from deepsvg.svglib.svg import SVG
+from deepsvg.difflib.tensor import SVGTensor
+from deepsvg.svglib.geom import Bbox
+from deepsvg.svgtensor_dataset import load_dataset, SVGFinetuneDataset
+from deepsvg.utils.utils import batchify
+from .state.project import DeepSVGProject, Frame
+from .utils import easein_easeout
+device = torch.device("cuda:0"if torch.cuda.is_available() else "cpu")
+pretrained_path = "./pretrained/hierarchical_ordered.pth.tar"
+cfg = Config()
+cfg.model_cfg.dropout = 0.  # for faster convergence
+model = cfg.make_model().to(device)
+model.eval()
+dataset = load_dataset(cfg)
+def decode(z):
+    commands_y, args_y, _ = model.greedy_sample(z=z)
+    tensor_pred = SVGTensor.from_cmd_args(commands_y[0].cpu(), args_y[0].cpu())
+    svg_path_sample = SVG.from_tensor(tensor_pred.data, viewbox=Bbox(256))
+    return svg_path_sample
+def encode_svg(svg):
+    data = dataset.get(model_args=[*cfg.model_args, "tensor_grouped"], svg=svg)
+    model_args = batchify((data[key] for key in cfg.model_args), device)
+    z = model(*model_args, encode_mode=True)
+    return z
+def interpolate_svg(svg1, svg2, n=10, ease=True):
+    z1, z2 = encode_svg(svg1), encode_svg(svg2)
+    alphas = torch.linspace(0., 1., n+2)[1:-1]
+    if ease:
+        alphas = easein_easeout(alphas)
+    z_list = [(1 - a) * z1 + a * z2 for a in alphas]
+    svgs = [decode(z) for z in z_list]
+    return svgs
+def finetune_model(project: DeepSVGProject, nb_augmentations=3500):
+    keyframe_ids = [i for i, frame in enumerate(project.frames) if frame.keyframe]
+    if len(keyframe_ids) < 2:
+        return
+    svgs = [project.frames[i].svg for i in keyframe_ids]
+    utils.load_model(pretrained_path, model)
+    print("Finetuning...")
+    finetune_dataset = SVGFinetuneDataset(dataset, svgs, frac=1.0, nb_augmentations=nb_augmentations)
+    dataloader = DataLoader(finetune_dataset, batch_size=cfg.batch_size, shuffle=True, drop_last=False,
+                            num_workers=cfg.loader_num_workers, collate_fn=cfg.collate_fn)
+    # Optimizer, lr & warmup schedulers
+    optimizers = cfg.make_optimizers(model)
+    scheduler_lrs = cfg.make_schedulers(optimizers, epoch_size=len(dataloader))
+    scheduler_warmups = cfg.make_warmup_schedulers(optimizers, scheduler_lrs)
+    loss_fns = [l.to(device) for l in cfg.make_losses()]
+    epoch = 0
+    for step, data in enumerate(dataloader):
+        model.train()
+        model_args = [data[arg].to(device) for arg in cfg.model_args]
+        labels = data["label"].to(device) if "label" in data else None
+        params_dict, weights_dict = cfg.get_params(step, epoch), cfg.get_weights(step, epoch)
+        for i, (loss_fn, optimizer, scheduler_lr, scheduler_warmup, optimizer_start) in enumerate(
+                zip(loss_fns, optimizers, scheduler_lrs, scheduler_warmups, cfg.optimizer_starts), 1):
+            optimizer.zero_grad()
+            output = model(*model_args, params=params_dict)
+            loss_dict = loss_fn(output, labels, weights=weights_dict)
+            loss_dict["loss"].backward()
+            if cfg.grad_clip is not None:
+                nn.utils.clip_grad_norm_(model.parameters(), cfg.grad_clip)
+            optimizer.step()
+            if scheduler_lr is not None:
+                scheduler_lr.step()
+            if scheduler_warmup is not None:
+                scheduler_warmup.step()
+            if step % 20 == 0:
+                print(f"Step {step}: loss: {loss_dict['loss']}")
+    print("Finetuning done.")
+def compute_interpolation(project: DeepSVGProject):
+    finetune_model(project)
+    keyframe_ids = [i for i, frame in enumerate(project.frames) if frame.keyframe]
+    if len(keyframe_ids) < 2:
+        return
+    model.eval()
+    for i1, i2 in zip(keyframe_ids[:-1], keyframe_ids[1:]):
+        frames_inbetween = i2 - i1 - 1
+        if frames_inbetween == 0:
+            continue
+        svgs = interpolate_svg(project.frames[i1].svg, project.frames[i2].svg, n=frames_inbetween, ease=False)
+        for di, svg in enumerate(svgs, 1):
+            project.frames[i1 + di] = Frame(i1 + di, keyframe=False, svg=svg)

deepsvg/gui/layout/__init__.py ADDED Viewed

File without changes

deepsvg/gui/layout/aligned_textinput.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from kivy.uix.textinput import TextInput
+from kivy.properties import StringProperty
+DEFAULT_PADDING = 6
+class AlignedTextInput(TextInput):
+    halign = StringProperty('left')
+    valign = StringProperty('top')
+    def __init__(self, **kwargs):
+        self.halign = kwargs.get("halign", "left")
+        self.valign = kwargs.get("valign", "top")
+        self.bind(on_text=self.on_text)
+        super().__init__(**kwargs)
+    def on_text(self, instance, value):
+        self.redraw()
+    def on_size(self, instance, value):
+        self.redraw()
+    def redraw(self):
+        """
+        Note: This methods depends on internal variables of its TextInput
+        base class (_lines_rects and _refresh_text())
+        """
+        self._refresh_text(self.text)
+        max_size = max(self._lines_rects, key=lambda r: r.size[0]).size
+        num_lines = len(self._lines_rects)
+        px = [DEFAULT_PADDING, DEFAULT_PADDING]
+        py = [DEFAULT_PADDING, DEFAULT_PADDING]
+        if self.halign == 'center':
+            d = (self.width - max_size[0]) / 2.0 - DEFAULT_PADDING
+            px = [d, d]
+        elif self.halign == 'right':
+            px[0] = self.width - max_size[0] - DEFAULT_PADDING
+        if self.valign == 'middle':
+            d = (self.height - max_size[1] * num_lines) / 2.0 - DEFAULT_PADDING
+            py = [d + 5, d - 5]
+        elif self.valign == 'bottom':
+            py[0] = self.height - max_size[1] * num_lines - DEFAULT_PADDING
+        self.padding_x = px
+        self.padding_y = py

deepsvg/gui/main.py ADDED Viewed

	@@ -0,0 +1,794 @@

+from kivy.app import App
+from kivy.uix.widget import Widget
+from kivy.uix.boxlayout import BoxLayout
+from kivy.uix.button import Button
+from kivy.uix.scatter import Scatter
+from kivy.uix.label import Label
+from kivy.uix.scrollview import ScrollView
+from kivy.properties import BooleanProperty, StringProperty, NumericProperty, ListProperty, ObjectProperty
+from kivy.uix.behaviors import ButtonBehavior
+from kivy.vector import Vector
+from kivy.metrics import dp
+from kivy.clock import Clock
+from kivy.uix.popup import Popup
+from kivy.config import Config
+Config.set('graphics', 'width', '1400')
+Config.set('graphics', 'height', '800')
+from kivy.core.window import Window
+import os
+from typing import List
+from deepsvg.svglib.geom import Point
+from deepsvg.svglib.svg_command import SVGCommandMove, SVGCommandLine, SVGCommandBezier
+from deepsvg.svgtensor_dataset import SVGTensorDataset
+from .layout.aligned_textinput import AlignedTextInput
+from .state.state import State, ToolMode, DrawMode, LoopMode, PlaybackMode
+from .state.project import Frame
+from .config import ROOT_DIR
+from .interpolate import compute_interpolation
+from .utils import *
+if not os.path.exists(ROOT_DIR):
+    os.makedirs(ROOT_DIR)
+state = State()
+state.load_state()
+state.load_project()
+class HeaderIcon(Button):
+    index = NumericProperty(0)
+    source = StringProperty("")
+    def on_press(self):
+        state.header.selected_tool = self.index
+class Header(BoxLayout):
+    selected_tool = NumericProperty(0)
+    title = StringProperty(state.project.name)
+    is_playing = BooleanProperty(False)
+    delay = NumericProperty(state.delay)
+    def on_selected_tool(self, *args):
+        if self.selected_tool in [ToolMode.MOVE, ToolMode.PEN, ToolMode.PENCIL] and state.header.is_playing:
+            state.header.pause_animation()
+    def on_done(self, *args):
+        if self.selected_tool == ToolMode.PEN and state.draw_mode == DrawMode.DRAW:
+            path = state.current_path
+            last_segment = path.children[-1]
+            path.remove_widget(last_segment)
+            state.draw_viewbox.on_path_done(state.current_path)
+            state.draw_mode = DrawMode.STILL
+            state.current_path = None
+            self.selected_tool = ToolMode.MOVE
+    def on_erase(self):
+        state.modified = True
+        state.draw_viewbox.clear()
+        state.timeline.make_keyframe(False)
+    def add_frame(self, keyframe=False):
+        frame_idx = state.timeline._add_frame(keyframe=keyframe)
+        state.project.frames.append(Frame(frame_idx, keyframe))
+        self.load_next_frame(frame_idx=frame_idx)
+    def play_animation(self):
+        self.is_playing = True
+        state.sidebar.selected_path_idx = -1
+        self.clock = Clock.schedule_once(self.load_next_frame)
+    def load_next_frame(self, dt=0, frame_idx=None, *args):
+        if state.timeline.nb_frames > 0:
+            if frame_idx is None:
+                frame_idx_tmp = state.timeline.selected_frame + state.loop_orientation
+                if frame_idx_tmp < 0 or frame_idx_tmp >= state.timeline.nb_frames:
+                    if state.loop_mode in [LoopMode.NORMAL, LoopMode.REVERSE]:
+                        frame_idx = frame_idx_tmp % state.timeline.nb_frames
+                    else:  # LoopMode.PINGPONG
+                        state.loop_orientation *= -1
+                        frame_idx = (state.timeline.selected_frame + state.loop_orientation) % state.timeline.nb_frames
+                else:
+                    frame_idx = frame_idx_tmp
+            state.timeline.selected_frame = frame_idx
+            if self.is_playing:
+                if state.playback_mode == PlaybackMode.EASE:
+                    t = frame_idx / state.timeline.nb_frames
+                    delay = 2 * state.delay / (1 + d_easein_easeout(t))
+                else:
+                    delay = state.delay
+                self.clock = Clock.schedule_once(self.load_next_frame, delay)
+    def pause_animation(self):
+        self.clock.cancel()
+        state.sidebar.selected_path_idx = -1
+        self.is_playing = False
+        state.timeline.on_selected_frame()  # re-render frame to display sidebar layers
+    def on_title(self, title):
+        state.project.name = title
+    def interpolate(self):
+        state.draw_viewbox.save_frame()
+        compute_interpolation(state.project)
+class PathLayerView(ButtonBehavior, BoxLayout):
+    index = NumericProperty(0)
+    source = StringProperty("")
+    def __init__(self, index, **kwargs):
+        super().__init__(**kwargs)
+        self.index = index
+        self.source = os.path.join(state.project.cache_dir, f"{state.timeline.selected_frame}_{index}.png")
+    def on_press(self):
+        state.sidebar.selected_path_idx = self.index
+    def move_up(self):
+        if self.index > 0:
+            state.sidebar.swap_paths(self.index, self.index - 1)
+    def move_down(self):
+        if self.index < state.sidebar.nb_paths - 1:
+            state.sidebar.swap_paths(self.index, self.index + 1)
+    def reverse(self):
+        state.sidebar.reverse_path(self.index)
+class Sidebar(ScrollView):
+    selected_path_idx = NumericProperty(-1)
+    @property
+    def sidebar(self):
+        return self.ids.sidebar
+    @property
+    def nb_paths(self):
+        return len(self.sidebar.children)
+    def on_selected_path_idx(self, *args):
+        state.draw_viewbox.unselect_all()
+        if self.selected_path_idx >= 0:
+            state.draw_viewbox.get_path(self.selected_path_idx).selected = True
+    def _add_path(self, idx=None):
+        if idx is None:
+            idx = self.nb_paths
+        new_pathlayer = PathLayerView(idx)
+        self.sidebar.add_widget(new_pathlayer)
+        return idx
+    def get_path(self, path_idx):
+        index = self.nb_paths - 1 - path_idx
+        return self.sidebar.children[index]
+    def erase(self):
+        self.sidebar.clear_widgets()
+        self.selected_path_idx = -1
+    def swap_paths(self, idx1, idx2):
+        path_layer1, path_layer2 = self.get_path(idx1), self.get_path(idx2)
+        path1, path2 = state.draw_viewbox.get_path(idx1), state.draw_viewbox.get_path(idx2)
+        path_layer1.index, path_layer2.index = idx2, idx1
+        path1.color, path2.color = path2.color, path1.color
+        path1.index, path2.index = path2.index, path1.index
+        id1, id2 = self.nb_paths - 1 - idx1, self.nb_paths - 1 - idx2
+        self.sidebar.children[id1], self.sidebar.children[id2] = path_layer2, path_layer1
+        state.draw_viewbox.children[id1], state.draw_viewbox.children[id2] = path2, path1
+        self.selected_path_idx = idx2
+        state.modified = True
+    def reverse_path(self, idx):
+        path = state.draw_viewbox.get_path(idx)
+        svg_path = path.to_svg_path().reverse()
+        new_path = BezierPath.from_svg_path(svg_path, color=path.color, index=path.index, selected=path.selected)
+        id = self.nb_paths - 1 - idx
+        state.draw_viewbox.remove_widget(path)
+        state.draw_viewbox.add_widget(new_path, index=id)
+        self.selected_path_idx = idx
+        state.modified = True
+    def select(self, path_idx):
+        if self.selected_path_idx >= 0:
+            state.draw_viewbox.get_path(state.sidebar.selected_path_idx).selected = False
+        self.selected_path_idx = path_idx
+class BezierSegment(Widget):
+    is_curved = BooleanProperty(True)
+    is_finished = BooleanProperty(True)
+    select_dist = NumericProperty(3)
+    p1 = ListProperty([0, 0])
+    q1 = ListProperty([0, 0])
+    q2 = ListProperty([0, 0])
+    p2 = ListProperty([0, 0])
+    def clone(self):
+        segment = BezierSegment()
+        segment.is_curved = self.is_curved
+        segment.p1 = self.p1  # shallow copy
+        segment.q1 = self.q1
+        segment.q2 = self.q2
+        segment.p2 = self.p2
+        return segment
+    @staticmethod
+    def line(p1, p2):
+        segment = BezierSegment()
+        segment.is_curved = False
+        segment.p1 = segment.q1 = p1
+        segment.p2 = segment.q2 = p2
+        return segment
+    @staticmethod
+    def bezier(p1, q1, q2, p2):
+        segment = BezierSegment()
+        segment.is_curved = True
+        segment.q1, segment.q2 = q1, q2
+        segment.p1, segment.p2 = p1, p2
+        return segment
+    def get_point(self, key):
+        return getattr(self, key)
+    def on_touch_down(self, touch):
+        max_dist = dp(self.select_dist)
+        if not self.parent.selected:
+            return super().on_touch_down(touch)
+        keys_to_test = ["p1", "q1", "q2", "p2"] if self.is_curved else ["p1", "p2"]
+        for key in keys_to_test:
+            if dist(touch.pos, getattr(self, key)) < max_dist:
+                touch.ud['selected'] = key
+                touch.grab(self)
+                state.modified = True
+                return True
+    def on_touch_move(self, touch):
+        if touch.grab_current is not self:
+            return super().on_touch_move(touch)
+        key = touch.ud['selected']
+        setattr(self, key, touch.pos)
+        if state.header.selected_tool == ToolMode.PEN:
+            self.is_curved = True
+            self.is_finished = False
+            state.draw_mode = DrawMode.HOLDING_DOWN
+            setattr(self, "p2", touch.pos)
+        if key in ["p1", "p2"]:
+            self.parent.move(self, key, touch.pos)
+    def on_touch_up(self, touch):
+        if touch.grab_current is not self:
+            return super().on_touch_up(touch)
+        touch.ungrab(self)
+        if state.header.selected_tool == ToolMode.PEN:
+            self.is_finished = True
+            state.draw_mode = DrawMode.DRAW
+class BezierPath(Widget):
+    color = ListProperty([1, 1, 1])
+    index = NumericProperty(0)
+    selected = BooleanProperty(False)
+    def __init__(self, segments: List[BezierSegment], color=None, index=None, selected=False, **kwargs):
+        super().__init__(**kwargs)
+        if color is not None:
+            self.color = color
+        if index is not None:
+            self.index = index
+        self.selected = selected
+        for segment in segments:
+            self.add_segment(segment)
+    def clone(self):
+        segments = [segment.clone() for segment in self.children]
+        return BezierPath(segments, self.color, self.index, self.selected)
+    def add_segment(self, segment: BezierSegment):
+        self.add_widget(segment, index=len(self.children))
+    def move(self, segment, key, pos):
+        idx = self.children.index(segment)
+        if not (idx == 0 and key == "p1") and not (idx == len(self.children) - 1 and key == "p2"):
+            idx2, key2 = (idx-1, "p2") if key == "p1" else (idx+1, "p1")
+            setattr(self.children[idx2], key2, pos)
+    def add_widget(self, widget, index=0, canvas=None):
+        super().add_widget(widget, index=index, canvas=canvas)
+    def remove_widget(self, widget):
+        super().remove_widget(widget)
+    @staticmethod
+    def from_svg_path(svg_path: SVGPath, *args, **kwargs):
+        segments = []
+        for command in svg_path.path_commands:
+            if isinstance(command, SVGCommandBezier):
+                segment = BezierSegment.bezier(flip_vertical(command.p1.tolist()), flip_vertical(command.q1.tolist()),
+                                               flip_vertical(command.q2.tolist()), flip_vertical(command.p2.tolist()))
+                segments.append(segment)
+            elif isinstance(command, SVGCommandLine):
+                segment = BezierSegment.line(flip_vertical(command.start_pos.tolist()),
+                                             flip_vertical(command.end_pos.tolist()))
+                segments.append(segment)
+        path = BezierPath(segments, *args, **kwargs)
+        return path
+    def to_svg_path(self):
+        path_commands = []
+        for segment in self.children:
+            if segment.is_curved:
+                command = SVGCommandBezier(Point(*flip_vertical(segment.p1)), Point(*flip_vertical(segment.q1)),
+                                           Point(*flip_vertical(segment.q2)), Point(*flip_vertical(segment.p2)))
+            else:
+                command = SVGCommandLine(Point(*flip_vertical(segment.p1)), Point(*flip_vertical(segment.p2)))
+            path_commands.append(command)
+        svg_path = SVGPath(path_commands)
+        return svg_path
+class Sketch(Widget):
+    color = ListProperty([1, 1, 1])
+    points = ListProperty([])
+    def __init__(self, points, color=None, **kwargs):
+        super().__init__(**kwargs)
+        if color is not None:
+            self.color = color
+        self.points = points
+    def on_touch_move(self, touch):
+        if touch.grab_current is not self:
+            return super().on_touch_move(touch)
+        self.points.extend(touch.pos)
+    def on_touch_up(self, touch):
+        if touch.grab_current is not self:
+            return super().on_touch_up(touch)
+        touch.ungrab(self)
+        self.parent.on_sketch_done(self)
+    def to_svg_path(self):
+        points = [Point(x, 255 - y) for x, y in zip(self.points[::2], self.points[1::2])]
+        commands = [SVGCommandMove(points[0])] + [SVGCommandLine(p1, p2) for p1, p2 in zip(points[:-1], points[1:])]
+        svg_path = SVGPath.from_commands(commands).path
+        return svg_path
+class EditorView(Scatter):
+    def on_touch_down(self, touch):
+        if self.collide_point(*touch.pos) and touch.is_mouse_scrolling:
+            if touch.button == 'scrolldown':
+                if self.scale < 10:
+                    self.scale = self.scale * 1.1
+            elif touch.button == 'scrollup':
+                if self.scale > 1:
+                    self.scale = self.scale * 0.8
+            return True
+        return super().on_touch_down(touch)
+class DrawViewbox(Widget):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        Window.bind(mouse_pos=self.on_mouse_pos)
+    @property
+    def nb_paths(self):
+        return len(self.children)
+    def _get_color(self, idx):
+        color = color_dict[colors[idx % len(colors)]]
+        return color
+    def on_mouse_pos(self, _, abs_pos):
+        pos = (Vector(abs_pos) - Vector(self.parent.pos)) / self.parent.scale
+        if state.header.selected_tool == ToolMode.PEN and state.draw_mode == DrawMode.DRAW:
+            segment = state.current_path.children[-1]
+            segment.p2 = segment.q2 = pos
+    def on_sketch_done(self, sketch: Sketch):
+        # Digitalize points to Bézier path
+        svg_path = preprocess_svg_path(sketch.to_svg_path(), force_smooth=True)
+        path_idx = state.sidebar.nb_paths
+        path = BezierPath.from_svg_path(svg_path, color=sketch.color, index=path_idx, selected=True)
+        self.remove_widget(sketch)
+        self.add_new_path(path, svg_path)
+    def on_path_done(self, path: BezierPath):
+        svg_path = preprocess_svg_path(path.to_svg_path())
+        path_idx = state.sidebar.nb_paths
+        new_path = BezierPath.from_svg_path(svg_path, color=path.color, index=path_idx, selected=True)
+        self.remove_widget(path)
+        self.add_new_path(new_path, svg_path)
+    def paste(self, path: BezierPath):
+        path = path.clone()
+        path_idx = state.sidebar.nb_paths
+        path.color = self._get_color(path_idx)
+        path.selected = True
+        svg_path = path.to_svg_path()
+        self.add_new_path(path, svg_path)
+    def unselect_all(self):
+        for path in self.children:
+            path.selected = False
+    def get_path(self, path_idx):
+        index = self.nb_paths - 1 - path_idx
+        return self.children[index]
+    def add_new_path(self, path: BezierSegment, svg_path: SVGPath):
+        self.add_path(path, svg_path, force_rerender_miniature=True)
+        state.modified = True
+        state.timeline.make_keyframe(True)
+        state.sidebar.select(path.index)
+    def add_path(self, path: BezierPath, svg_path: SVGPath, force_rerender_miniature=False):
+        path_idx = state.sidebar.nb_paths
+        self.add_widget(path)
+        miniature_path = os.path.join(state.project.cache_dir, f"{state.timeline.selected_frame}_{path_idx}.png")
+        if not os.path.exists(miniature_path) or force_rerender_miniature:
+            svg_path = normalized_path(svg_path)
+            svg_path.draw(viewbox=svg_path.bbox().make_square(min_size=12),
+                          file_path=os.path.join(state.project.cache_dir, f"{state.timeline.selected_frame}_{path_idx}.png"),
+                          do_display=False)
+        if not state.header.is_playing:
+            state.sidebar._add_path()
+    def on_touch_down(self, touch):
+        if state.header.selected_tool == ToolMode.PLAY:
+            return False
+        if state.header.selected_tool == ToolMode.PEN and self.collide_point(*touch.pos):
+            state.draw_mode = DrawMode.DRAW
+            if state.current_path is None:
+                path = BezierPath([], color=self._get_color(len(self.children)), selected=True)
+                self.add_widget(path)
+                state.current_path = path
+            l = BezierSegment.line(touch.pos, touch.pos)
+            touch.ud["selected"] = "q1"
+            touch.grab(l)
+            state.current_path.add_segment(l)
+            state.modified = True
+            return True
+        if state.header.selected_tool == ToolMode.PENCIL and self.collide_point(*touch.pos):
+            l = Sketch([*touch.pos], color=self._get_color(len(self.children)))
+            self.add_widget(l)
+            touch.grab(l)
+            state.modified = True
+            return True
+        if super().on_touch_down(touch):
+            return True
+    def clear(self):
+        state.draw_viewbox.clear_widgets()
+        state.sidebar.erase()
+    def add_widget(self, widget, index=0, canvas=None):
+        super().add_widget(widget, index=index, canvas=canvas)
+    def remove_widget(self, widget):
+        super().remove_widget(widget)
+    def to_svg(self):
+        svg_path_groups = []
+        for path in reversed(self.children):
+            svg_path_groups.append(path.to_svg_path().to_group())
+        svg = SVG(svg_path_groups, viewbox=Bbox(256))
+        return svg
+    def load_svg(self, svg: SVG, frame_idx):
+        kivy_bezierpaths = []
+        for idx, svg_path in enumerate(svg.paths):
+            path = BezierPath.from_svg_path(svg_path, color=self._get_color(idx), index=idx, selected=False)
+            kivy_bezierpaths.append(path)
+            self.add_path(path, svg_path, force_rerender_miniature=True)
+        state.project.frames[frame_idx].svg = svg
+        state.project.frames[frame_idx].kivy_bezierpaths = kivy_bezierpaths
+    def load_cached(self, svg: SVG, kivy_bezierpaths: List[BezierPath]):
+        for path, svg_path in zip(kivy_bezierpaths, svg.paths):
+            self.add_path(path, svg_path)
+    def load_frame(self, frame_idx):
+        svg = state.project.frames[frame_idx].svg
+        kivy_bezierpaths = state.project.frames[frame_idx].kivy_bezierpaths
+        if kivy_bezierpaths is None:
+            self.load_svg(svg, frame_idx)
+        else:
+            self.load_cached(svg, kivy_bezierpaths)
+        self.unselect_all()
+    def save_frame(self):
+        svg = self.to_svg()
+        state.project.frames[state.current_frame].svg = svg
+        state.project.frames[state.current_frame].kivy_bezierpaths = [child for child in reversed(self.children) if isinstance(child, BezierPath)]
+class HeaderButton(Button):
+    pass
+class UpButton(Button):
+    def on_press(self):
+        self.parent.move_up()
+class DownButton(Button):
+    def on_press(self):
+        self.parent.move_down()
+class ReverseButton(Button):
+    def on_press(self):
+        self.parent.reverse()
+class FrameView(Button):
+    index = NumericProperty(0)
+    keyframe = BooleanProperty(False)
+    def __init__(self, index, keyframe=False, **kwargs):
+        super().__init__(**kwargs)
+        self.index = index
+        self.keyframe = keyframe
+    def on_press(self):
+        state.timeline.selected_frame = self.index
+class TimeLine(ScrollView):
+    selected_frame = NumericProperty(-1)
+    @property
+    def timeline(self):
+        return self.ids.timeline
+    @property
+    def nb_frames(self):
+        return len(self.timeline.children)
+    def on_selected_frame(self, *args):
+        self._update_frame(self.selected_frame)
+    def _update_frame(self, new_frame_idx):
+        if state.current_frame >= 0 and state.modified:
+            state.draw_viewbox.save_frame()
+        state.current_frame = new_frame_idx
+        state.draw_viewbox.clear()
+        state.modified = False
+        state.draw_viewbox.load_frame(new_frame_idx)
+    def _add_frame(self, keyframe=False):
+        idx = self.nb_frames
+        new_frame = FrameView(idx, keyframe=keyframe)
+        self.timeline.add_widget(new_frame)
+        return idx
+    def get_frame(self, frame_idx):
+        index = self.nb_frames - 1 - frame_idx
+        return self.timeline.children[index]
+    def make_keyframe(self, is_keyframe=None):
+        if is_keyframe is None:
+            is_keyframe = not self.get_frame(state.timeline.selected_frame).keyframe
+        self.get_frame(state.timeline.selected_frame).keyframe = is_keyframe
+        state.project.frames[state.timeline.selected_frame].keyframe = is_keyframe
+class TitleWidget(AlignedTextInput):
+    pass
+class Padding(Label):
+    pass
+class FileChoosePopup(Popup):
+    load = ObjectProperty()
+    path = StringProperty(".")
+class DeepSVGWidget(BoxLayout):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        state.main_widget = self
+        state.header = self.ids.header
+        state.sidebar = self.ids.sidebar_scroll
+        state.draw_viewbox = self.ids.editor.ids.draw_viewbox
+        state.timeline = self.ids.timeline_scroll
+        self._load_project()
+    def _load_project(self):
+        for frame in state.project.frames:
+            state.timeline._add_frame(keyframe=frame.keyframe)
+        state.timeline.selected_frame = 0
+class DeepSVGApp(App):
+    def build(self):
+        self.title = 'DeepSVG Editor'
+        Window.bind(on_request_close=self.on_request_close)
+        Window.bind(on_keyboard=self.on_keyboard)
+        return DeepSVGWidget()
+    def save(self):
+        state.draw_viewbox.save_frame()
+        state.save_state()
+        state.project.save_project()
+    def on_request_close(self, *args, **kwargs):
+        self.save()
+        self.stop()
+    def on_keyboard(self, window, key, scancode, codepoint, modifier):
+        CTRL_PRESSED = (modifier == ['ctrl'] or modifier == ['meta'])
+        if codepoint == "h" and not CTRL_PRESSED:
+            # Hand tool
+            state.header.selected_tool = ToolMode.MOVE
+        elif codepoint == "p" and not CTRL_PRESSED:
+            # Pen tool
+            state.header.selected_tool = ToolMode.PEN
+        elif CTRL_PRESSED and codepoint == "p":
+            # Pencil tool
+            state.header.selected_tool = ToolMode.PENCIL
+        elif codepoint == "k" and not CTRL_PRESSED:
+            # Make keypoint
+            state.timeline.make_keyframe()
+        elif CTRL_PRESSED and codepoint == 'q':
+            # Quit
+            self.on_request_close()
+        elif CTRL_PRESSED and codepoint == 'i':
+            # Import
+            self.file_chooser = FileChoosePopup(load=self.on_file_chosen)
+            self.file_chooser.open()
+        elif CTRL_PRESSED and codepoint == "e":
+            # Export
+            state.project.export_to_gif(loop_mode=state.loop_mode)
+        elif CTRL_PRESSED and codepoint == 'c':
+            # Copy
+            if state.sidebar.selected_path_idx >= 0:
+                state.clipboard = state.draw_viewbox.get_path(state.sidebar.selected_path_idx).clone()
+        elif CTRL_PRESSED and codepoint == 'v':
+            # Paste
+            if isinstance(state.clipboard, BezierPath):
+                state.draw_viewbox.paste(state.clipboard)
+        elif CTRL_PRESSED and codepoint == 's':
+            # Save
+            self.save()
+        elif key == Keys.SPACEBAR:
+            # Play/Pause
+            state.header.selected_tool = ToolMode.PLAY
+            if state.header.is_playing:
+                state.header.pause_animation()
+            else:
+                state.header.play_animation()
+        elif key == Keys.LEFT:
+            # Previous frame
+            if state.current_frame > 0:
+                state.timeline.selected_frame = state.current_frame - 1
+        elif key == Keys.RIGHT:
+            # Next frame
+            if state.current_frame < state.timeline.nb_frames - 1:
+                state.timeline.selected_frame = state.current_frame + 1
+    def on_file_chosen(self, selection):
+        file_path = str(selection[0])
+        self.file_chooser.dismiss()
+        if file_path:
+            if not file_path.endswith(".svg"):
+                return
+            svg = SVG.load_svg(file_path)
+            svg = SVGTensorDataset.simplify(svg)
+            svg = SVGTensorDataset.preprocess(svg, mean=True)
+            state.draw_viewbox.load_svg(svg, frame_idx=state.timeline.selected_frame)
+            state.modified = True
+            state.timeline.make_keyframe(True)
+if __name__ == "__main__":
+    DeepSVGApp().run()

deepsvg/gui/res/down.png ADDED Viewed

deepsvg/gui/res/hand.png ADDED Viewed

deepsvg/gui/res/hand.svg ADDED Viewed

deepsvg/gui/res/pause.png ADDED Viewed

deepsvg/gui/res/pen.png ADDED Viewed

deepsvg/gui/res/pen.svg ADDED Viewed

deepsvg/gui/res/pencil.png ADDED Viewed

deepsvg/gui/res/pencil.svg ADDED Viewed

deepsvg/gui/res/play.png ADDED Viewed

deepsvg/gui/res/play.svg ADDED Viewed

deepsvg/gui/res/switch.png ADDED Viewed

deepsvg/gui/res/up.png ADDED Viewed

deepsvg/gui/state/__init__.py ADDED Viewed

File without changes

deepsvg/gui/state/project.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import uuid
+import json
+import numpy as np
+from moviepy.editor import ImageClip, concatenate_videoclips
+import shutil
+from deepsvg.svglib.svg import SVG
+from deepsvg.svglib.geom import Bbox
+from ..config import ROOT_DIR
+class Frame:
+    def __init__(self, index, keyframe=False, svg=None):
+        self.index = index
+        self.keyframe = keyframe
+        if svg is None:
+            svg = SVG([], viewbox=Bbox(256))
+        self.svg = svg
+        self.kivy_bezierpaths = None
+    def to_dict(self):
+        return {
+            "index": self.index,
+            "keyframe": self.keyframe
+        }
+    @staticmethod
+    def load_dict(frame):
+        f = Frame(frame["index"], frame["keyframe"])
+        return f
+class DeepSVGProject:
+    def __init__(self, name="Title"):
+        self.name = name
+        self.uid = str(uuid.uuid4())
+        self.frames = [Frame(index=0)]
+    @property
+    def filename(self):
+        return os.path.join(ROOT_DIR, f"{self.uid}.json")
+    @property
+    def base_dir(self):
+        base_dir = os.path.join(ROOT_DIR, self.uid)
+        if not os.path.exists(base_dir):
+            os.makedirs(base_dir)
+        return base_dir
+    @property
+    def cache_dir(self):
+        cache_dir = os.path.join(self.base_dir, "cache")
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir)
+        return cache_dir
+    def load_project(self, file_path):
+        with open(file_path, "r") as f:
+            data = json.load(f)
+            self.name = data["name"]
+            self.uid = data["uid"]
+            self.load_frames(data["frames"])
+        shutil.rmtree(self.cache_dir)
+    def load_frames(self, frames):
+        self.frames = [Frame.load_dict(frame) for frame in frames]
+        for frame in self.frames:
+            frame.svg = SVG.load_svg(os.path.join(self.base_dir, f"{frame.index}.svg"))
+    def save_project(self):
+        with open(self.filename, "w") as f:
+            data = {
+                "name": self.name,
+                "uid": self.uid,
+                "frames": [frame.to_dict() for frame in self.frames]
+            }
+            json.dump(data, f)
+        self.save_frames()
+    def save_frames(self):
+        for frame in self.frames:
+            frame.svg.save_svg(os.path.join(self.base_dir, f"{frame.index}.svg"))
+    def export_to_gif(self, frame_duration=0.1, loop_mode=0):
+        from .state import LoopMode
+        imgs = [frame.svg.copy().normalize().draw(do_display=False, return_png=True) for frame in self.frames]
+        if loop_mode == LoopMode.REVERSE:
+            imgs = imgs[::-1]
+        elif loop_mode == LoopMode.PINGPONG:
+            imgs = imgs + imgs[::-1]
+        clips = [ImageClip(np.array(img)).set_duration(frame_duration) for img in imgs]
+        clip = concatenate_videoclips(clips, method="compose", bg_color=(255, 255, 255))
+        file_path = os.path.join(ROOT_DIR, f"{self.uid}.gif")
+        clip.write_gif(file_path, fps=24, verbose=False, logger=None)

deepsvg/gui/state/state.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from .project import DeepSVGProject
+from ..config import STATE_PATH
+import pickle
+import os
+class ToolMode:
+    MOVE = 0
+    PEN = 1
+    PENCIL = 2
+    PLAY = 3
+class DrawMode:
+    STILL = 0
+    DRAW = 1
+    HOLDING_DOWN = 2
+class LoopMode:
+    NORMAL = 0
+    REVERSE = 1
+    PINGPONG = 2
+class PlaybackMode:
+    NORMAL = 0
+    EASE = 1
+class LoopOrientation:
+    FORWARD = 1
+    BACKWARD = -1
+class State:
+    def __init__(self):
+        self.project_file = None
+        self.project = DeepSVGProject()
+        self.loop_mode = LoopMode.PINGPONG
+        self.loop_orientation = LoopOrientation.FORWARD
+        self.playback_mode = PlaybackMode.EASE
+        self.delay = 1 / 10.
+        self.modified = False
+        # Keep track of previously selected current_frame, separately from timeline's selected_frame attribute
+        self.current_frame = -1
+        self.current_path = None
+        self.draw_mode = DrawMode.STILL
+        self.clipboard = None
+        # UI references
+        self.main_widget = None
+        self.header = None
+        self.sidebar = None
+        self.draw_viewbox = None
+        self.timeline = None
+    def save_state(self):
+        with open(STATE_PATH, "wb") as f:
+            state_dict = {k: v for k, v in self.__dict__.items() if k in ["project_file"]}
+            pickle.dump(state_dict, f)
+    def load_state(self):
+        if os.path.exists(STATE_PATH):
+            with open(STATE_PATH, "rb") as f:
+                self.__dict__.update(pickle.load(f))
+    def load_project(self):
+        if self.project_file is not None:
+            self.project.load_project(self.project_file)
+        else:
+            self.project_file = self.project.filename

deepsvg/gui/utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from deepsvg.svglib.svg import SVG
+from deepsvg.svglib.svg_path import SVGPath
+from deepsvg.svglib.geom import Bbox
+color_dict = {
+    "deepskyblue": [0., 0.69, 0.97],
+    "lime": [0.02, 1., 0.01],
+    "deeppink": [1., 0.07, 0.53],
+    "gold": [1., 0.81, 0.01],
+    "coral": [1., 0.45, 0.27],
+    "darkviolet": [0.53, 0.01, 0.8],
+    "royalblue": [0.21, 0.36, 0.86],
+    "darkmagenta":  [0.5, 0., 0.5],
+    "teal": [0., 0.45, 0.45],
+    "green": [0., 0.45, 0.],
+    "maroon": [0.45, 0., 0.],
+    "aqua": [0., 1., 1.],
+    "grey": [0.45, 0.45, 0.45],
+    "steelblue": [0.24, 0.46, 0.67],
+    "orange": [1., 0.6, 0.01]
+}
+colors = ["deepskyblue", "lime", "deeppink", "gold", "coral", "darkviolet", "royalblue", "darkmagenta", "teal",
+          "gold", "green", "maroon", "aqua", "grey", "steelblue", "lime", "orange"]
+class Keys:
+    LEFT = 276
+    UP = 273
+    RIGHT = 275
+    DOWN = 274
+    SPACEBAR = 32
+def dist(a, b):
+    return ((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) ** .5
+def preprocess_svg_path(svg_path: SVGPath, force_smooth=False):
+    svg = SVG([svg_path.to_group()], viewbox=Bbox(256)).normalize()
+    svg.canonicalize()
+    svg.filter_duplicates()
+    svg = svg.simplify_heuristic(force_smooth=force_smooth)
+    svg.normalize()
+    svg.numericalize(256)
+    return svg[0].path
+def normalized_path(svg_path):
+    svg = SVG([svg_path.copy().to_group()], viewbox=Bbox(256)).normalize()
+    return svg[0].path
+def flip_vertical(p):
+    return [p[0], 255 - p[1]]
+def easein_easeout(t):
+    return t * t / (2. * (t * t - t) + 1.)
+def d_easein_easeout(t):
+    return 3 * (1 - t) * t / (2 * t * t - 2 * t + 1) ** 2

deepsvg/model/basic_blocks.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import torch
+import torch.nn as nn
+class FCN(nn.Module):
+    def __init__(self, d_model, n_commands, n_args, args_dim=256, abs_targets=False):
+        super().__init__()
+        self.n_args = n_args
+        self.args_dim = args_dim
+        self.abs_targets = abs_targets
+        self.command_fcn = nn.Linear(d_model, n_commands)
+        if abs_targets:
+            self.args_fcn = nn.Linear(d_model, n_args)
+        else:
+            self.args_fcn = nn.Linear(d_model, n_args * args_dim)
+    def forward(self, out):
+        S, N, _ = out.shape
+        command_logits = self.command_fcn(out)  # Shape [S, N, n_commands]
+        args_logits = self.args_fcn(out)       # Shape [S, N, n_args * args_dim]
+        if not self.abs_targets:
+            args_logits = args_logits.reshape(S, N, self.n_args, self.args_dim)  # Shape [S, N, n_args, args_dim]
+        return command_logits, args_logits
+class ArgumentFCN(nn.Module):
+    def __init__(self, d_model, n_args, args_dim=256, abs_targets=False):
+        super().__init__()
+        self.n_args = n_args
+        self.args_dim = args_dim
+        self.abs_targets = abs_targets
+        # classification -> regression
+        if abs_targets:
+            self.args_fcn = nn.Sequential(
+                nn.Linear(d_model, n_args * args_dim),
+                nn.Linear(n_args * args_dim, n_args)
+            )
+        else:
+            self.args_fcn = nn.Linear(d_model, n_args * args_dim)
+    def forward(self, out):
+        S, N, _ = out.shape
+        args_logits = self.args_fcn(out)  # Shape [S, N, n_args * args_dim]
+        if not self.abs_targets:
+            args_logits = args_logits.reshape(S, N, self.n_args, self.args_dim)  # Shape [S, N, n_args, args_dim]
+        return args_logits
+class HierarchFCN(nn.Module):
+    def __init__(self, d_model, dim_z):
+        super().__init__()
+        # self.visibility_fcn = nn.Linear(d_model, 2)
+        # self.z_fcn = nn.Linear(d_model, dim_z)
+        self.visibility_fcn = nn.Linear(dim_z, 2)
+        self.z_fcn = nn.Linear(dim_z, dim_z)
+    def forward(self, out):
+        G, N, _ = out.shape
+        visibility_logits = self.visibility_fcn(out)  # Shape [G, N, 2]
+        z = self.z_fcn(out)  # Shape [G, N, dim_z]
+        return visibility_logits.unsqueeze(0), z.unsqueeze(0)
+class ResNet(nn.Module):
+    def __init__(self, d_model):
+        super().__init__()
+        self.linear1 = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.ReLU()
+        )
+        self.linear2 = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.ReLU()
+        )
+        self.linear3 = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.ReLU()
+        )
+        self.linear4 = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.ReLU()
+        )
+    def forward(self, z):
+        z = z + self.linear1(z)
+        z = z + self.linear2(z)
+        z = z + self.linear3(z)
+        z = z + self.linear4(z)
+        return z

deepsvg/model/config.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from deepsvg.difflib.tensor import SVGTensor
+class _DefaultConfig:
+    """
+    Model config.
+    """
+    def __init__(self):
+        self.args_dim = 256              # Coordinate numericalization, default: 256 (8-bit)
+        self.n_args = 11                 # Tensor nb of arguments, default: 11 (rx,ry,phi,fA,fS,qx1,qy1,qx2,qy2,x1,x2)
+        self.n_commands = len(SVGTensor.COMMANDS_SIMPLIFIED)  # m, l, c, a, EOS, SOS, z
+        self.dropout = 0.1                # Dropout rate used in basic layers and Transformers
+        self.model_type = "transformer"  # "transformer" ("lstm" implementation is work in progress)
+        self.encode_stages = 1           # One-stage or two-stage: 1 | 2
+        self.decode_stages = 1           # One-stage or two-stage: 1 | 2
+        self.use_resnet = True           # Use extra fully-connected residual blocks after Encoder
+        self.use_vae = True              # Sample latent vector (with reparametrization trick) or use encodings directly
+        self.pred_mode = "one_shot"      # Feed-forward (one-shot) or autogressive: "one_shot" | "autoregressive"
+        self.rel_targets = False         # Predict coordinates in relative or absolute format
+        self.label_condition = False     # Make all blocks conditional on the label
+        self.n_labels = 100              # Number of labels (when used)
+        self.dim_label = 64              # Label embedding dimensionality
+        self.self_match = False          # Use Hungarian (self-match) or Ordered assignment
+        self.n_layers = 4                # Number of Encoder blocks
+        self.n_layers_decode = 4         # Number of Decoder blocks
+        self.n_heads = 8                 # Transformer config: number of heads
+        self.dim_feedforward = 512       # Transformer config: FF dimensionality
+        self.d_model = 256               # Transformer config: model dimensionality
+        self.dim_z = 256                 # Latent vector dimensionality
+        self.max_num_groups = 8          # Number of paths (N_P)
+        self.max_seq_len = 30            # Number of commands (N_C)
+        self.max_total_len = self.max_num_groups * self.max_seq_len  # Concatenated sequence length for baselines
+        self.num_groups_proposal = self.max_num_groups  # Number of predicted paths, default: N_P
+    def get_model_args(self):
+        model_args = []
+        model_args += ["commands_grouped", "args_grouped"] if self.encode_stages <= 1 else ["commands", "args"]
+        if self.rel_targets:
+            model_args += ["commands_grouped", "args_rel_grouped"] if self.decode_stages == 1 else ["commands", "args_rel"]
+        else:
+            model_args += ["commands_grouped", "args_grouped"] if self.decode_stages == 1 else ["commands", "args"]
+        if self.label_condition:
+            model_args.append("label")
+        return model_args
+class SketchRNN(_DefaultConfig):
+    # LSTM - Autoregressive - One-stage
+    def __init__(self):
+        super().__init__()
+        self.model_type = "lstm"
+        self.pred_mode = "autoregressive"
+        self.rel_targets = True
+class Sketchformer(_DefaultConfig):
+    # Transformer - Autoregressive - One-stage
+    def __init__(self):
+        super().__init__()
+        self.pred_mode = "autoregressive"
+        self.rel_targets = True
+class OneStageOneShot(_DefaultConfig):
+    # Transformer - One-shot - One-stage
+    def __init__(self):
+        super().__init__()
+        self.encode_stages = 1
+        self.decode_stages = 1
+class Hierarchical(_DefaultConfig):
+    # Transformer - One-shot - Two-stage - Ordered
+    def __init__(self):
+        super().__init__()
+        self.encode_stages = 2
+        self.decode_stages = 2
+class HierarchicalSelfMatching(_DefaultConfig):
+    # Transformer - One-shot - Two-stage - Hungarian
+    def __init__(self):
+        super().__init__()
+        self.encode_stages = 2
+        self.decode_stages = 2
+        self.self_match = True

deepsvg/model/layers/__init__.py ADDED Viewed

File without changes

deepsvg/model/layers/attention.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import torch
+from torch.nn import Linear
+from torch.nn.init import xavier_uniform_
+from torch.nn.init import constant_
+from torch.nn.init import xavier_normal_
+from torch.nn.parameter import Parameter
+from torch.nn.modules.module import Module
+from .functional import multi_head_attention_forward
+class MultiheadAttention(Module):
+    r"""Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+    Args:
+        embed_dim: total dimension of the model.
+        num_heads: parallel attention heads.
+        dropout: a Dropout layer on attn_output_weights. Default: 0.0.
+        bias: add bias as module parameter. Default: True.
+        add_bias_kv: add bias to the key and value sequences at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        kdim: total number of features in key. Default: None.
+        vdim: total number of features in key. Default: None.
+        Note: if kdim and vdim are None, they will be set to embed_dim such that
+        query, key, and value have the same number of features.
+    Examples::
+        >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
+        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)
+    """
+    __annotations__ = {
+        'bias_k': torch._jit_internal.Optional[torch.Tensor],
+        'bias_v': torch._jit_internal.Optional[torch.Tensor],
+    }
+    __constants__ = ['q_proj_weight', 'k_proj_weight', 'v_proj_weight', 'in_proj_weight']
+    def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        if self._qkv_same_embed_dim is False:
+            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
+            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
+            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
+            self.register_parameter('in_proj_weight', None)
+        else:
+            self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))
+            self.register_parameter('q_proj_weight', None)
+            self.register_parameter('k_proj_weight', None)
+            self.register_parameter('v_proj_weight', None)
+        if bias:
+            self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+        self.out_proj = Linear(embed_dim, embed_dim, bias=bias)
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.empty(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.empty(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+        self.add_zero_attn = add_zero_attn
+        self._reset_parameters()
+    def _reset_parameters(self):
+        if self._qkv_same_embed_dim:
+            xavier_uniform_(self.in_proj_weight)
+        else:
+            xavier_uniform_(self.q_proj_weight)
+            xavier_uniform_(self.k_proj_weight)
+            xavier_uniform_(self.v_proj_weight)
+        if self.in_proj_bias is not None:
+            constant_(self.in_proj_bias, 0.)
+            constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            xavier_normal_(self.bias_v)
+    def __setstate__(self, state):
+        # Support loading old MultiheadAttention checkpoints generated by v1.1.0
+        if '_qkv_same_embed_dim' not in state:
+            state['_qkv_same_embed_dim'] = True
+        super(MultiheadAttention, self).__setstate__(state)
+    def forward(self, query, key, value, key_padding_mask=None,
+                need_weights=True, attn_mask=None):
+        # type: (Tensor, Tensor, Tensor, Optional[Tensor], bool, Optional[Tensor]) -> Tuple[Tensor, Optional[Tensor]]
+        r"""
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. This is an binary mask. When the value is True,
+            the corresponding value on the attention layer will be filled with -inf.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. This is an additive mask
+            (i.e. the values will be added to the attention layer). A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+    Shape:
+        - Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key_padding_mask: :math:`(N, S)`, ByteTensor, where N is the batch size, S is the source sequence length.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length.
+        - Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension.
+        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+          L is the target sequence length, S is the source sequence length.
+        """
+        if not self._qkv_same_embed_dim:
+            return multi_head_attention_forward(
+                query, key, value, self.embed_dim, self.num_heads,
+                self.in_proj_weight, self.in_proj_bias,
+                self.bias_k, self.bias_v, self.add_zero_attn,
+                self.dropout, self.out_proj.weight, self.out_proj.bias,
+                training=self.training,
+                key_padding_mask=key_padding_mask, need_weights=need_weights,
+                attn_mask=attn_mask, use_separate_proj_weight=True,
+                q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,
+                v_proj_weight=self.v_proj_weight)
+        else:
+            return multi_head_attention_forward(
+                query, key, value, self.embed_dim, self.num_heads,
+                self.in_proj_weight, self.in_proj_bias,
+                self.bias_k, self.bias_v, self.add_zero_attn,
+                self.dropout, self.out_proj.weight, self.out_proj.bias,
+                training=self.training,
+                key_padding_mask=key_padding_mask, need_weights=need_weights,
+                attn_mask=attn_mask)

deepsvg/model/layers/functional.py ADDED Viewed

	@@ -0,0 +1,256 @@

+from __future__ import division
+import torch
+import torch.nn.functional as F
+def multi_head_attention_forward(query,                           # type: Tensor
+                                 key,                             # type: Tensor
+                                 value,                           # type: Tensor
+                                 embed_dim_to_check,              # type: int
+                                 num_heads,                       # type: int
+                                 in_proj_weight,                  # type: Tensor
+                                 in_proj_bias,                    # type: Tensor
+                                 bias_k,                          # type: Optional[Tensor]
+                                 bias_v,                          # type: Optional[Tensor]
+                                 add_zero_attn,                   # type: bool
+                                 dropout_p,                       # type: float
+                                 out_proj_weight,                 # type: Tensor
+                                 out_proj_bias,                   # type: Tensor
+                                 training=True,                   # type: bool
+                                 key_padding_mask=None,           # type: Optional[Tensor]
+                                 need_weights=True,               # type: bool
+                                 attn_mask=None,                  # type: Optional[Tensor]
+                                 use_separate_proj_weight=False,  # type: bool
+                                 q_proj_weight=None,              # type: Optional[Tensor]
+                                 k_proj_weight=None,              # type: Optional[Tensor]
+                                 v_proj_weight=None,              # type: Optional[Tensor]
+                                 static_k=None,                   # type: Optional[Tensor]
+                                 static_v=None                    # type: Optional[Tensor]
+                                 ):
+    # type: (...) -> Tuple[Tensor, Optional[Tensor]]
+    r"""
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        embed_dim_to_check: total dimension of the model.
+        num_heads: parallel attention heads.
+        in_proj_weight, in_proj_bias: input projection weight and bias.
+        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        dropout_p: probability of an element to be zeroed.
+        out_proj_weight, out_proj_bias: the output projection weight and bias.
+        training: apply dropout if is ``True``.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. This is an binary mask. When the value is True,
+            the corresponding value on the attention layer will be filled with -inf.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. This is an additive mask
+            (i.e. the values will be added to the attention layer). A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+        use_separate_proj_weight: the function accept the proj. weights for query, key,
+            and value in different forms. If false, in_proj_weight will be used, which is
+            a combination of q_proj_weight, k_proj_weight, v_proj_weight.
+        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.
+        static_k, static_v: static key and value used for attention operators.
+    Shape:
+        Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key_padding_mask: :math:`(N, S)`, ByteTensor, where N is the batch size, S is the source sequence length.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length.
+        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
+          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
+        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
+          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
+        Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension.
+        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+          L is the target sequence length, S is the source sequence length.
+    """
+    tgt_len, bsz, embed_dim = query.size()
+    assert embed_dim == embed_dim_to_check
+    assert key.size() == value.size()
+    head_dim = embed_dim // num_heads
+    assert head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
+    scaling = float(head_dim) ** -0.5
+    if not use_separate_proj_weight:
+        if torch.equal(query, key) and torch.equal(key, value):
+            # self-attention
+            q, k, v = F.linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)
+        elif torch.equal(key, value):
+            # encoder-decoder attention
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = F.linear(query, _w, _b)
+            if key is None:
+                assert value is None
+                k = None
+                v = None
+            else:
+                # This is inline in_proj function with in_proj_weight and in_proj_bias
+                _b = in_proj_bias
+                _start = embed_dim
+                _end = None
+                _w = in_proj_weight[_start:, :]
+                if _b is not None:
+                    _b = _b[_start:]
+                k, v = F.linear(key, _w, _b).chunk(2, dim=-1)
+        else:
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = F.linear(query, _w, _b)
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim
+            _end = embed_dim * 2
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            k = F.linear(key, _w, _b)
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim * 2
+            _end = None
+            _w = in_proj_weight[_start:, :]
+            if _b is not None:
+                _b = _b[_start:]
+            v = F.linear(value, _w, _b)
+    else:
+        q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)
+        len1, len2 = q_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == query.size(-1)
+        k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)
+        len1, len2 = k_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == key.size(-1)
+        v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)
+        len1, len2 = v_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == value.size(-1)
+        if in_proj_bias is not None:
+            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])
+            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)])
+            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):])
+        else:
+            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias)
+            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias)
+            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias)
+    q = q * scaling
+    if attn_mask is not None:
+        if attn_mask.dim() == 2:
+            attn_mask = attn_mask.unsqueeze(0)
+            if list(attn_mask.size()) != [1, query.size(0), key.size(0)]:
+                raise RuntimeError('The size of the 2D attn_mask is not correct.')
+        elif attn_mask.dim() == 3:
+            if list(attn_mask.size()) != [bsz * num_heads, query.size(0), key.size(0)]:
+                raise RuntimeError('The size of the 3D attn_mask is not correct.')
+        else:
+            raise RuntimeError("attn_mask's dimension {} is not supported".format(attn_mask.dim()))
+        # attn_mask's dim is 3 now.
+    if bias_k is not None and bias_v is not None:
+        if static_k is None and static_v is None:
+            k = torch.cat([k, bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = F.pad(attn_mask, (0, 1))
+            if key_padding_mask is not None:
+                key_padding_mask = F.pad(key_padding_mask, (0, 1))
+        else:
+            assert static_k is None, "bias cannot be added to static key."
+            assert static_v is None, "bias cannot be added to static value."
+    else:
+        assert bias_k is None
+        assert bias_v is None
+    q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)
+    if k is not None:
+        k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+    if v is not None:
+        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+    if static_k is not None:
+        assert static_k.size(0) == bsz * num_heads
+        assert static_k.size(2) == head_dim
+        k = static_k
+    if static_v is not None:
+        assert static_v.size(0) == bsz * num_heads
+        assert static_v.size(2) == head_dim
+        v = static_v
+    src_len = k.size(1)
+    if key_padding_mask is not None:
+        assert key_padding_mask.size(0) == bsz
+        assert key_padding_mask.size(1) == src_len
+    if add_zero_attn:
+        src_len += 1
+        k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)
+        v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)
+        if attn_mask is not None:
+            attn_mask = F.pad(attn_mask, (0, 1))
+        if key_padding_mask is not None:
+            key_padding_mask = F.pad(key_padding_mask, (0, 1))
+    attn_output_weights = torch.bmm(q, k.transpose(1, 2))
+    assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]
+    if attn_mask is not None:
+        attn_output_weights += attn_mask
+    if key_padding_mask is not None:
+        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
+        attn_output_weights = attn_output_weights.masked_fill(
+            key_padding_mask.unsqueeze(1).unsqueeze(2),
+            float('-inf'),
+        )
+        attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)
+    attn_output_weights = F.softmax(
+        attn_output_weights, dim=-1)
+    attn_output_weights = F.dropout(attn_output_weights, p=dropout_p, training=training)
+    attn_output = torch.bmm(attn_output_weights, v)
+    assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]
+    attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+    attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)
+    if need_weights:
+        # average attention weights over heads
+        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
+        return attn_output, attn_output_weights.sum(dim=1) / num_heads
+    else:
+        return attn_output, None

deepsvg/model/layers/improved_transformer.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import torch
+import copy
+from torch.nn import functional as F
+from torch.nn.modules.module import Module
+from torch.nn.modules.container import ModuleList
+from torch.nn.init import xavier_uniform_
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.normalization import LayerNorm
+from .attention import MultiheadAttention
+from .transformer import _get_activation_fn
+class TransformerEncoderLayerImproved(Module):
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", d_global2=None):
+        super(TransformerEncoderLayerImproved, self).__init__()
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        if d_global2 is not None:
+            self.linear_global2 = Linear(d_global2, d_model)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2_2 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerEncoderLayerImproved, self).__setstate__(state)
+    def forward(self, src, memory2=None, src_mask=None, src_key_padding_mask=None):
+        src1 = self.norm1(src)
+        src2 = self.self_attn(src1, src1, src1, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
+        src = src + self.dropout1(src2)
+        if memory2 is not None:
+            src2_2 = self.linear_global2(memory2)
+            src = src + self.dropout2_2(src2_2)
+        src1 = self.norm2(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src1))))
+        src = src + self.dropout2(src2)
+        return src
+class TransformerDecoderLayerImproved(Module):
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
+        super(TransformerDecoderLayerImproved, self).__init__()
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.norm3 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+        self.dropout3 = Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerDecoderLayerImproved, self).__setstate__(state)
+    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None):
+        tgt1 = self.norm1(tgt)
+        tgt2 = self.self_attn(tgt1, tgt1, tgt1, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt1 = self.norm2(tgt)
+        tgt2 = self.multihead_attn(tgt1, memory, memory, attn_mask=memory_mask, key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt1 = self.norm3(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt1))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+class TransformerDecoderLayerGlobalImproved(Module):
+    def __init__(self, d_model, d_global, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", d_global2=None):
+        super(TransformerDecoderLayerGlobalImproved, self).__init__()
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.linear_global = Linear(d_global, d_model)
+        if d_global2 is not None:
+            self.linear_global2 = Linear(d_global2, d_model)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+        self.dropout2_2 = Dropout(dropout)
+        self.dropout3 = Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerDecoderLayerGlobalImproved, self).__setstate__(state)
+    def forward(self, tgt, memory, memory2=None, tgt_mask=None, tgt_key_padding_mask=None, *args, **kwargs):
+        tgt1 = self.norm1(tgt)
+        tgt2 = self.self_attn(tgt1, tgt1, tgt1, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt2 = self.linear_global(memory)
+        tgt = tgt + self.dropout2(tgt2)  # implicit broadcast
+        if memory2 is not None:
+            tgt2_2 = self.linear_global2(memory2)
+            tgt = tgt + self.dropout2_2(tgt2_2)
+        tgt1 = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt1))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt

deepsvg/model/layers/positional_encoding.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import math
+import torch
+import torch.nn as nn
+class PositionalEncodingSinCos(nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_len=250):
+        super(PositionalEncodingSinCos, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        x = x + self.pe[:x.size(0), :]
+        return self.dropout(x)
+class PositionalEncodingLUT(nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_len=250):
+        super(PositionalEncodingLUT, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        position = torch.arange(0, max_len, dtype=torch.long).unsqueeze(1)
+        self.register_buffer('position', position)
+        self.pos_embed = nn.Embedding(max_len, d_model)
+        self._init_embeddings()
+    def _init_embeddings(self):
+        nn.init.kaiming_normal_(self.pos_embed.weight, mode="fan_in")
+    def forward(self, x):
+        pos = self.position[:x.size(0)]
+        x = x + self.pos_embed(pos)
+        return self.dropout(x)

deepsvg/model/layers/transformer.py ADDED Viewed

	@@ -0,0 +1,393 @@

+import torch
+import copy
+from torch.nn import functional as F
+from torch.nn.modules.module import Module
+from torch.nn.modules.container import ModuleList
+from torch.nn.init import xavier_uniform_
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.normalization import LayerNorm
+from .attention import MultiheadAttention
+class Transformer(Module):
+    r"""A transformer model. User is able to modify the attributes as needed. The architecture
+    is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
+    Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
+    Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information
+    Processing Systems, pages 6000-6010. Users can build the BERT(https://arxiv.org/abs/1810.04805)
+    model with corresponding parameters.
+    Args:
+        d_model: the number of expected features in the encoder/decoder inputs (default=512).
+        nhead: the number of heads in the multiheadattention models (default=8).
+        num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6).
+        num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of encoder/decoder intermediate layer, relu or gelu (default=relu).
+        custom_encoder: custom encoder (default=None).
+        custom_decoder: custom decoder (default=None).
+    Examples::
+        >>> transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
+        >>> src = torch.rand((10, 32, 512))
+        >>> tgt = torch.rand((20, 32, 512))
+        >>> out = transformer_model(src, tgt)
+    Note: A full example to apply nn.Transformer module for the word language model is available in
+    https://github.com/pytorch/examples/tree/master/word_language_model
+    """
+    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
+                 num_decoder_layers=6, dim_feedforward=2048, dropout=0.1,
+                 activation="relu", custom_encoder=None, custom_decoder=None):
+        super(Transformer, self).__init__()
+        if custom_encoder is not None:
+            self.encoder = custom_encoder
+        else:
+            encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
+            encoder_norm = LayerNorm(d_model)
+            self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+        if custom_decoder is not None:
+            self.decoder = custom_decoder
+        else:
+            decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
+            decoder_norm = LayerNorm(d_model)
+            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)
+        self._reset_parameters()
+        self.d_model = d_model
+        self.nhead = nhead
+    def forward(self, src, tgt, src_mask=None, tgt_mask=None,
+                memory_mask=None, src_key_padding_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor  # noqa
+        r"""Take in and process masked source/target sequences.
+        Args:
+            src: the sequence to the encoder (required).
+            tgt: the sequence to the decoder (required).
+            src_mask: the additive mask for the src sequence (optional).
+            tgt_mask: the additive mask for the tgt sequence (optional).
+            memory_mask: the additive mask for the encoder output (optional).
+            src_key_padding_mask: the ByteTensor mask for src keys per batch (optional).
+            tgt_key_padding_mask: the ByteTensor mask for tgt keys per batch (optional).
+            memory_key_padding_mask: the ByteTensor mask for memory keys per batch (optional).
+        Shape:
+            - src: :math:`(S, N, E)`.
+            - tgt: :math:`(T, N, E)`.
+            - src_mask: :math:`(S, S)`.
+            - tgt_mask: :math:`(T, T)`.
+            - memory_mask: :math:`(T, S)`.
+            - src_key_padding_mask: :math:`(N, S)`.
+            - tgt_key_padding_mask: :math:`(N, T)`.
+            - memory_key_padding_mask: :math:`(N, S)`.
+            Note: [src/tgt/memory]_mask should be filled with
+            float('-inf') for the masked positions and float(0.0) else. These masks
+            ensure that predictions for position i depend only on the unmasked positions
+            j and are applied identically for each sequence in a batch.
+            [src/tgt/memory]_key_padding_mask should be a ByteTensor where True values are positions
+            that should be masked with float('-inf') and False values will be unchanged.
+            This mask ensures that no information will be taken from position i if
+            it is masked, and has a separate mask for each sequence in a batch.
+            - output: :math:`(T, N, E)`.
+            Note: Due to the multi-head attention architecture in the transformer model,
+            the output sequence length of a transformer is same as the input sequence
+            (i.e. target) length of the decode.
+            where S is the source sequence length, T is the target sequence length, N is the
+            batch size, E is the feature number
+        Examples:
+            >>> output = transformer_model(src, tgt, src_mask=src_mask, tgt_mask=tgt_mask)
+        """
+        if src.size(1) != tgt.size(1):
+            raise RuntimeError("the batch number of src and tgt must be equal")
+        if src.size(2) != self.d_model or tgt.size(2) != self.d_model:
+            raise RuntimeError("the feature number of src and tgt must be equal to d_model")
+        memory = self.encoder(src, mask=src_mask, src_key_padding_mask=src_key_padding_mask)
+        output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask,
+                              tgt_key_padding_mask=tgt_key_padding_mask,
+                              memory_key_padding_mask=memory_key_padding_mask)
+        return output
+    def generate_square_subsequent_mask(self, sz):
+        r"""Generate a square mask for the sequence. The masked positions are filled with float('-inf').
+            Unmasked positions are filled with float(0.0).
+        """
+        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+        return mask
+    def _reset_parameters(self):
+        r"""Initiate parameters in the transformer model."""
+        for p in self.parameters():
+            if p.dim() > 1:
+                xavier_uniform_(p)
+class TransformerEncoder(Module):
+    r"""TransformerEncoder is a stack of N encoder layers
+    Args:
+        encoder_layer: an instance of the TransformerEncoderLayer() class (required).
+        num_layers: the number of sub-encoder-layers in the encoder (required).
+        norm: the layer normalization component (optional).
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = transformer_encoder(src)
+    """
+    __constants__ = ['norm']
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super(TransformerEncoder, self).__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(self, src, memory2=None, mask=None, src_key_padding_mask=None):
+        # type: (Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the input through the encoder layers in turn.
+        Args:
+            src: the sequence to the encoder (required).
+            mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        Shape:
+            see the docs in Transformer class.
+        """
+        output = src
+        for mod in self.layers:
+            output = mod(output, memory2=memory2, src_mask=mask, src_key_padding_mask=src_key_padding_mask)
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class TransformerDecoder(Module):
+    r"""TransformerDecoder is a stack of N decoder layers
+    Args:
+        decoder_layer: an instance of the TransformerDecoderLayer() class (required).
+        num_layers: the number of sub-decoder-layers in the decoder (required).
+        norm: the layer normalization component (optional).
+    Examples::
+        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
+        >>> transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
+        >>> memory = torch.rand(10, 32, 512)
+        >>> tgt = torch.rand(20, 32, 512)
+        >>> out = transformer_decoder(tgt, memory)
+    """
+    __constants__ = ['norm']
+    def __init__(self, decoder_layer, num_layers, norm=None):
+        super(TransformerDecoder, self).__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(self, tgt, memory, memory2=None, tgt_mask=None,
+                memory_mask=None, tgt_key_padding_mask=None,
+                memory_key_padding_mask=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the inputs (and mask) through the decoder layer in turn.
+        Args:
+            tgt: the sequence to the decoder (required).
+            memory: the sequence from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+        Shape:
+            see the docs in Transformer class.
+        """
+        output = tgt
+        for mod in self.layers:
+            output = mod(output, memory, memory2=memory2, tgt_mask=tgt_mask,
+                         memory_mask=memory_mask,
+                         tgt_key_padding_mask=tgt_key_padding_mask,
+                         memory_key_padding_mask=memory_key_padding_mask)
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class TransformerEncoderLayer(Module):
+    r"""TransformerEncoderLayer is made up of self-attn and feedforward network.
+    This standard encoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = encoder_layer(src)
+    """
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
+        super(TransformerEncoderLayer, self).__init__()
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerEncoderLayer, self).__setstate__(state)
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the input through the encoder layer.
+        Args:
+            src: the sequence to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        Shape:
+            see the docs in Transformer class.
+        """
+        src2 = self.self_attn(src, src, src, attn_mask=src_mask,
+                              key_padding_mask=src_key_padding_mask)[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+class TransformerDecoderLayer(Module):
+    r"""TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.
+    This standard decoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+    Examples::
+        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
+        >>> memory = torch.rand(10, 32, 512)
+        >>> tgt = torch.rand(20, 32, 512)
+        >>> out = decoder_layer(tgt, memory)
+    """
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
+        super(TransformerDecoderLayer, self).__init__()
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.norm3 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+        self.dropout3 = Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerDecoderLayer, self).__setstate__(state)
+    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the inputs (and mask) through the decoder layer.
+        Args:
+            tgt: the sequence to the decoder layer (required).
+            memory: the sequence from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+        Shape:
+            see the docs in Transformer class.
+        """
+        tgt2 = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+        tgt2 = self.multihead_attn(tgt, memory, memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+def _get_clones(module, N):
+    return ModuleList([copy.deepcopy(module) for i in range(N)])
+def _get_activation_fn(activation):
+    if activation == "relu":
+        return F.relu
+    elif activation == "gelu":
+        return F.gelu
+    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))

deepsvg/model/layers/utils.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+def to_negative_mask(mask):
+    if mask is None:
+        return
+    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+    return mask
+def generate_square_subsequent_mask(sz):
+    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+    return mask
+def generate_adj_subsequent_mask(sz):
+    mask = torch.diag(torch.ones(sz), diagonal=0) + torch.diag(torch.ones(sz-1), diagonal=-1)
+    if sz >= 2:
+        mask = mask + torch.diag(torch.ones(sz-2), diagonal=-2)
+    return to_negative_mask(mask)
+def generate_adj_mask(sz):
+    mask = torch.diag(torch.ones(sz), diagonal=0) +\
+           torch.diag(torch.ones(sz - 1), diagonal=+1) +\
+           torch.diag(torch.ones(sz - 1), diagonal=-1)
+    if sz >= 2:
+        mask = mask + torch.diag(torch.ones(sz - 2), diagonal=-2) +\
+               torch.diag(torch.ones(sz - 2), diagonal=+2)
+    return to_negative_mask(mask)

deepsvg/model/loss.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from deepsvg.difflib.tensor import SVGTensor
+from .utils import _get_padding_mask, _get_visibility_mask
+from .config import _DefaultConfig
+class SVGLoss(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super().__init__()
+        self.cfg = cfg
+        self.args_dim = 2 * cfg.args_dim if cfg.rel_targets else cfg.args_dim + 1
+        self.register_buffer("cmd_args_mask", SVGTensor.CMD_ARGS_MASK)
+    def forward(self, output, labels, weights):
+        loss = 0.
+        res = {}
+        # VAE
+        if self.cfg.use_vae:
+            mu, logsigma = output["mu"], output["logsigma"]
+            loss_kl = -0.5 * torch.mean(1 + logsigma - mu.pow(2) - torch.exp(logsigma))
+            loss_kl = loss_kl.clamp(min=weights["kl_tolerance"])
+            loss += weights["loss_kl_weight"] * loss_kl
+            res["loss_kl"] = loss_kl
+        # remove commitment loss
+        # if self.cfg.use_vqvae:
+        #     vqvae_loss = output["vqvae_loss"].mean()
+        #     loss += vqvae_loss
+        #     res["vqvae_loss"] = vqvae_loss
+        # Target & predictions
+        # tgt_commands.shape [batch_size, max_num_groups, max_seq_len + 2]
+        # tgt_args.shape     [batch_size, max_num_groups, max_seq_len + 2, n_args]
+        tgt_commands, tgt_args = output["tgt_commands"], output["tgt_args"]
+        visibility_mask = _get_visibility_mask(tgt_commands, seq_dim=-1)
+        padding_mask = _get_padding_mask(tgt_commands, seq_dim=-1, extended=True) * visibility_mask.unsqueeze(-1)
+        command_logits, args_logits = output["command_logits"], output["args_logits"]
+        # 2-stage visibility
+        if self.cfg.decode_stages == 2:
+            visibility_logits = output["visibility_logits"]
+            loss_visibility = F.cross_entropy(visibility_logits.reshape(-1, 2), visibility_mask.reshape(-1).long())
+            loss += weights["loss_visibility_weight"] * loss_visibility
+            res["loss_visibility"] = loss_visibility
+        # Commands & args
+        if self.cfg.bin_targets:  # 当使用 bin_targets 时，每个坐标是由 8 bit 代表的，所以会多一维
+            tgt_args = tgt_args[..., 1:, :, :]
+        else:
+            tgt_args = tgt_args[..., 1:, :]
+        tgt_commands, padding_mask = tgt_commands[..., 1:], padding_mask[..., 1:]
+        # mask.shape [batch_size, 8, 31, 11]
+        # 对于预测正确的 command, mask 会乘上 True, cmd_args_mask 向量不会发生改变
+        # 对于预测错误的 command, mask 会乘上 False, 相当于把 cmd_args_mask 置为 0, 即不统计对应的 args
+        # pred_cmd = torch.argmax(command_logits, dim = -1)
+        # mask = self.cmd_args_mask[tgt_commands.long()] * (pred_cmd == tgt_commands).unsqueeze(-1)
+        mask = self.cmd_args_mask[tgt_commands.long()]
+        # padding_mask.shape   [batch_size, num_path, num_commands + 1]
+        # command_logits.shape [batch_size, num_path, num_commands + 1, n_commands]
+        # command_logits[padding_mask.bool()].shape [-1, n_commands]
+        # 目的是把 PAD 的位置筛掉
+        loss_cmd = F.cross_entropy(command_logits[padding_mask.bool()].reshape(-1, self.cfg.n_commands), tgt_commands[padding_mask.bool()].reshape(-1).long())
+        if self.cfg.abs_targets:
+            # l2 loss performs better than l1 loss
+            loss_args = nn.MSELoss()(
+                args_logits[mask.bool()].reshape(-1),
+                tgt_args[mask.bool()].reshape(-1).float()
+            )
+        elif self.cfg.bin_targets:
+            loss_args = nn.MSELoss()(
+                args_logits[mask.bool()].reshape(-1),
+                tgt_args[mask.bool()].reshape(-1).float()
+            )
+        else:
+            loss_args = F.cross_entropy(
+                args_logits[mask.bool()].reshape(-1, self.args_dim),
+                tgt_args[mask.bool()].reshape(-1).long() + 1
+            )  # shift due to -1 PAD_VAL
+        loss += weights["loss_cmd_weight"] * loss_cmd \
+                + weights["loss_args_weight"] * loss_args
+        res.update({
+            "loss": loss,
+            "loss_cmd": loss_cmd,
+            "loss_args": loss_args
+        })
+        return res

deepsvg/model/model.py ADDED Viewed

	@@ -0,0 +1,690 @@

+from deepsvg.difflib.tensor import SVGTensor
+from deepsvg.utils.utils import _pack_group_batch, _unpack_group_batch, _make_seq_first, _make_batch_first, eval_decorator
+from deepsvg.utils import bit2int
+from .layers.transformer import *
+from .layers.improved_transformer import *
+from .layers.positional_encoding import *
+from .vector_quantize_pytorch import VectorQuantize
+from .basic_blocks import FCN, HierarchFCN, ResNet, ArgumentFCN
+from .config import _DefaultConfig
+from .utils import (_get_padding_mask, _get_key_padding_mask, _get_group_mask, _get_visibility_mask,
+                    _get_key_visibility_mask, _generate_square_subsequent_mask, _sample_categorical, _threshold_sample)
+from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
+from scipy.optimize import linear_sum_assignment
+from einops import rearrange
+from random import randint
+class SVGEmbedding(nn.Module):
+    def __init__(self, cfg: _DefaultConfig, seq_len, use_group=True, group_len=None):
+        super().__init__()
+        self.cfg = cfg
+        # command embedding
+        self.command_embed = nn.Embedding(cfg.n_commands, cfg.d_model)  # (7, 256)
+        self.embed_fcn = nn.Linear(cfg.n_args, cfg.d_model)
+        self.use_group = use_group
+        if use_group:
+            if group_len is None:
+                group_len = cfg.max_num_groups
+            self.group_embed = nn.Embedding(group_len+2, cfg.d_model)
+        self.pos_encoding = PositionalEncodingLUT(cfg.d_model, max_len=seq_len+2, dropout=cfg.dropout)
+        self.register_buffer("cmd_args_mask", SVGTensor.CMD_ARGS_MASK)
+        self._init_embeddings()
+    def _init_embeddings(self):
+        nn.init.kaiming_normal_(self.command_embed.weight, mode="fan_in")
+        nn.init.kaiming_normal_(self.embed_fcn.weight, mode="fan_in")
+        # if not self.cfg.bin_targets:
+        #     nn.init.kaiming_normal_(self.arg_embed.weight, mode="fan_in")
+        if self.use_group:
+            nn.init.kaiming_normal_(self.group_embed.weight, mode="fan_in")
+    def forward(self, commands, args, groups=None):
+        # commands.shape (32, 960) = (max_seq_len + 2, max_num_groups * batch_size)
+        S, GN = commands.shape
+        src = self.command_embed(commands.long()) + self.embed_fcn(args)
+        if self.use_group:
+            src = src + self.group_embed(groups.long())
+        src = self.pos_encoding(src)
+        return src
+class ConstEmbedding(nn.Module):
+    def __init__(self, cfg: _DefaultConfig, seq_len):
+        super().__init__()
+        self.cfg = cfg
+        self.seq_len = seq_len
+        self.PE = PositionalEncodingLUT(cfg.d_model, max_len=seq_len, dropout=cfg.dropout)
+    def forward(self, z):
+        N = z.size(1)
+        src = self.PE(z.new_zeros(self.seq_len, N, self.cfg.d_model))
+        return src
+class LabelEmbedding(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super().__init__()
+        self.label_embedding = nn.Embedding(cfg.n_labels, cfg.dim_label)
+        self._init_embeddings()
+    def _init_embeddings(self):
+        nn.init.kaiming_normal_(self.label_embedding.weight, mode="fan_in")
+    def forward(self, label):
+        src = self.label_embedding(label)
+        return src
+class Encoder(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super().__init__()
+        self.cfg = cfg
+        seq_len = cfg.max_seq_len if cfg.encode_stages == 2 else cfg.max_total_len
+        self.use_group = cfg.encode_stages == 1
+        self.embedding = SVGEmbedding(cfg, seq_len, use_group=self.use_group)
+        if cfg.label_condition:
+            self.label_embedding = LabelEmbedding(cfg)
+        dim_label = cfg.dim_label if cfg.label_condition else None
+        if cfg.model_type == "transformer":
+            encoder_layer = TransformerEncoderLayerImproved(cfg.d_model, cfg.n_heads, cfg.dim_feedforward, cfg.dropout, d_global2=dim_label)
+            encoder_norm = LayerNorm(cfg.d_model)
+            self.encoder = TransformerEncoder(encoder_layer, cfg.n_layers, encoder_norm)
+        else:  # "lstm"
+            self.encoder = nn.LSTM(cfg.d_model, cfg.d_model // 2, dropout=cfg.dropout, bidirectional=True)
+        if cfg.encode_stages == 2:
+            if not cfg.self_match:
+                self.hierarchical_PE = PositionalEncodingLUT(cfg.d_model, max_len=cfg.max_num_groups)
+            # hierarchical_encoder_layer = TransformerEncoderLayerImproved(cfg.d_model, cfg.n_heads, cfg.dim_feedforward, cfg.dropout, d_global2=dim_label)
+            # hierarchical_encoder_norm = LayerNorm(cfg.d_model)
+            # self.hierarchical_encoder = TransformerEncoder(hierarchical_encoder_layer, cfg.n_layers, hierarchical_encoder_norm)
+    def forward(self, commands, args, label=None):
+        # commands.shape: [batch_size, max_num_groups, max_seq_len + 2]
+        # args.shape:     [batch_size, max_num_groups, max_seq_len + 2, n_args]
+        S, G, N = commands.shape
+        l = self.label_embedding(label).unsqueeze(0).unsqueeze(0).repeat(1, commands.size(1), 1, 1) if self.cfg.label_condition else None
+        # if self.cfg.encode_stages == 2:
+        #     visibility_mask, key_visibility_mask = _get_visibility_mask(commands, seq_dim=0), _get_key_visibility_mask(commands, seq_dim=0)
+        commands, args, l = _pack_group_batch(commands, args, l)
+        # commands.shape: [batch_size, max_num_groups * (max_seq_len + 2)]
+        # key_padding_mask 使得在做 attention 的时候可以遮住 <PAD>
+        padding_mask, key_padding_mask = _get_padding_mask(commands, seq_dim=0), _get_key_padding_mask(commands, seq_dim=0)
+        group_mask = _get_group_mask(commands, seq_dim=0) if self.use_group else None
+        # cmd_src, args_src = self.embedding(commands, args, group_mask)
+        src = self.embedding(commands, args, group_mask)
+        if self.cfg.model_type == "transformer":
+            memory = self.encoder(src, mask=None, src_key_padding_mask=key_padding_mask, memory2=l)
+            z = memory * padding_mask # 不对 command 做 avg
+        else:  # "lstm"
+            hidden_cell = (src.new_zeros(2, N, self.cfg.d_model // 2),
+                           src.new_zeros(2, N, self.cfg.d_model // 2))
+            sequence_lengths = padding_mask.sum(dim=0).squeeze(-1)
+            x = pack_padded_sequence(src, sequence_lengths, enforce_sorted=False)
+            packed_output, _ = self.encoder(x, hidden_cell)
+            memory, _ = pad_packed_sequence(packed_output)
+            idx = (sequence_lengths - 1).long().view(1, -1, 1).repeat(1, 1, self.cfg.d_model)
+            z = memory.gather(dim=0, index=idx)
+        # cmd_z, args_z = _unpack_group_batch(N, cmd_z, args_z)
+        z = _unpack_group_batch(N, z)
+        # 为什么不用 encode_stages == 1 这个 flag 来实现单个 encoder?
+        # 当 encode_stages = 1 时, 获取 data 会有一个 group 操作. 现在尽量不修改原来的代码逻辑
+        if self.cfg.one_encoder:
+            return z.transpose(0, 1)
+        if self.cfg.encode_stages == 2:
+            assert False, 'not use E2'
+            # src = z.transpose(0, 1)
+            # src = _pack_group_batch(src)
+            # l = self.label_embedding(label).unsqueeze(0) if self.cfg.label_condition else None
+            # if not self.cfg.self_match:
+            #     src = self.hierarchical_PE(src)
+            # memory = self.hierarchical_encoder(src, mask=None, src_key_padding_mask=key_visibility_mask, memory2=l)
+            # if self.cfg.quantize_path:
+            #     z = (memory * visibility_mask)
+            # else:
+            #     z = (memory * visibility_mask).sum(dim=0, keepdim=True) / visibility_mask.sum(dim=0, keepdim=True)
+            # z = _unpack_group_batch(N, z)
+        return z
+class VAE(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super(VAE, self).__init__()
+        self.enc_mu_fcn = nn.Linear(cfg.d_model, cfg.dim_z)
+        self.enc_sigma_fcn = nn.Linear(cfg.d_model, cfg.dim_z)
+        self._init_embeddings()
+    def _init_embeddings(self):
+        nn.init.normal_(self.enc_mu_fcn.weight, std=0.001)
+        nn.init.constant_(self.enc_mu_fcn.bias, 0)
+        nn.init.normal_(self.enc_sigma_fcn.weight, std=0.001)
+        nn.init.constant_(self.enc_sigma_fcn.bias, 0)
+    def forward(self, z):
+        mu, logsigma = self.enc_mu_fcn(z), self.enc_sigma_fcn(z)
+        sigma = torch.exp(logsigma / 2.)
+        z = mu + sigma * torch.randn_like(sigma)
+        return z, mu, logsigma
+class Bottleneck(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super(Bottleneck, self).__init__()
+        self.bottleneck = nn.Linear(cfg.d_model, cfg.dim_z)
+    def forward(self, z):
+        return self.bottleneck(z)
+class Decoder(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super(Decoder, self).__init__()
+        self.cfg = cfg
+        if cfg.label_condition:
+            self.label_embedding = LabelEmbedding(cfg)
+        dim_label = cfg.dim_label if cfg.label_condition else None
+        if cfg.decode_stages == 2:
+            # self.hierarchical_embedding = ConstEmbedding(cfg, cfg.num_groups_proposal)
+            # hierarchical_decoder_layer = TransformerDecoderLayerGlobalImproved(cfg.d_model, cfg.dim_z, cfg.n_heads, cfg.dim_feedforward, cfg.dropout, d_global2=dim_label)
+            # hierarchical_decoder_norm = LayerNorm(cfg.d_model)
+            # self.hierarchical_decoder = TransformerDecoder(hierarchical_decoder_layer, cfg.n_layers_decode, hierarchical_decoder_norm)
+            self.hierarchical_fcn = HierarchFCN(cfg.d_model, cfg.dim_z)
+        if cfg.pred_mode == "autoregressive":
+            self.embedding = SVGEmbedding(cfg, cfg.max_total_len, rel_args=cfg.rel_targets, use_group=True, group_len=cfg.max_total_len)
+            square_subsequent_mask = _generate_square_subsequent_mask(self.cfg.max_total_len+1)
+            self.register_buffer("square_subsequent_mask", square_subsequent_mask)
+        else:  # "one_shot"
+            seq_len = cfg.max_seq_len+1 if cfg.decode_stages == 2 else cfg.max_total_len+1
+            self.embedding = ConstEmbedding(cfg, seq_len)
+            if cfg.args_decoder:
+                self.argument_embedding = ConstEmbedding(cfg, seq_len)
+        if cfg.model_type == "transformer":
+            decoder_layer = TransformerDecoderLayerGlobalImproved(cfg.d_model, cfg.dim_z, cfg.n_heads, cfg.dim_feedforward, cfg.dropout, d_global2=dim_label)
+            decoder_norm = LayerNorm(cfg.d_model)
+            self.decoder = TransformerDecoder(decoder_layer, cfg.n_layers_decode, decoder_norm)
+        else:  # "lstm"
+            self.fc_hc = nn.Linear(cfg.dim_z, 2 * cfg.d_model)
+            self.decoder = nn.LSTM(cfg.d_model, cfg.d_model, dropout=cfg.dropout)
+        if cfg.rel_targets:
+            args_dim = 2 * cfg.args_dim
+        if cfg.bin_targets:
+            args_dim = 8
+        else:
+            args_dim = cfg.args_dim + 1
+        self.fcn = FCN(cfg.d_model, cfg.n_commands, cfg.n_args, args_dim, cfg.abs_targets)
+    def _get_initial_state(self, z):
+        hidden, cell = torch.split(torch.tanh(self.fc_hc(z)), self.cfg.d_model, dim=2)
+        hidden_cell = hidden.contiguous(), cell.contiguous()
+        return hidden_cell
+    def forward(self, z, commands, args, label=None, hierarch_logits=None, return_hierarch=False):
+        N = z.size(2)
+        l = self.label_embedding(label).unsqueeze(0) if self.cfg.label_condition else None
+        if hierarch_logits is None:
+            # z = _pack_group_batch(z)
+            visibility_z = _pack_group_batch(torch.mean(z[:, 1:, ...], dim=1, keepdim=True))  # 负责预测 visibility, 并且把 SOS 移除
+        if self.cfg.decode_stages == 2:
+            if hierarch_logits is None:
+                # src = self.hierarchical_embedding(z)
+                # # print('D2 PE src', src.shape)
+                # # print('D2 con z', z.shape)
+                # out = self.hierarchical_decoder(src, z, tgt_mask=None, tgt_key_padding_mask=None, memory2=l)
+                # # print('D2 out', out.shape)
+                # hierarch_logits, _z = self.hierarchical_fcn(out)
+                # # print('hierarch_logits origin', hierarch_logits.shape)
+                # only linear layer for visibility prediction
+                hierarch_logits, _z = self.hierarchical_fcn(visibility_z)
+            if self.cfg.label_condition: l = l.unsqueeze(0).repeat(1, z.size(1), 1, 1)
+            hierarch_logits, l = _pack_group_batch(hierarch_logits, l)
+            if not self.cfg.connect_through:
+                z = _pack_group_batch(_z)
+            if return_hierarch:
+                return _unpack_group_batch(N, hierarch_logits, z)
+        if self.cfg.pred_mode == "autoregressive":
+            S = commands.size(0)
+            commands, args = _pack_group_batch(commands, args)
+            group_mask = _get_group_mask(commands, seq_dim=0)
+            src = self.embedding(commands, args, group_mask)
+            if self.cfg.model_type == "transformer":
+                key_padding_mask = _get_key_padding_mask(commands, seq_dim=0)
+                out = self.decoder(src, z, tgt_mask=self.square_subsequent_mask[:S, :S], tgt_key_padding_mask=key_padding_mask, memory2=l)
+            else:  # "lstm"
+                hidden_cell = self._get_initial_state(z)  # TODO: reinject intermediate state
+                out, _ = self.decoder(src, hidden_cell)
+        else:  # "one_shot"
+            if self.cfg.connect_through:
+                z = rearrange(z, 'p c b d -> c (p b) d')
+                z = z[1:, ...]
+            src = self.embedding(z)
+            out = self.decoder(src, z, tgt_mask=None, tgt_key_padding_mask=None, memory2=l)
+            # print('D1 out', out.shape)
+        if self.cfg.args_decoder:
+            command_logits = self.command_fcn(out)
+            z = torch.argmax(command_logits, dim=-1).unsqueeze(-1).float()
+            src = self.argument_embedding(z)
+            # print('D0 PE src', src.shape)
+            # print('D0 con z', z.shape)
+            out = self.argument_decoder(src, z, tgt_mask=None, tgt_key_padding_mask=None, memory2=l)
+            # print('D0 out', out.shape)
+            args_logits = self.argument_fcn(out)
+        else:
+            # command_logits, args_logits = self.fcn(cmd_out, args_out)
+            command_logits, args_logits = self.fcn(out)
+        out_logits = (command_logits, args_logits) + ((hierarch_logits,) if self.cfg.decode_stages == 2 else ())
+        return _unpack_group_batch(N, *out_logits)
+class SVGTransformer(nn.Module):
+    def __init__(self, cfg: _DefaultConfig):
+        super(SVGTransformer, self).__init__()
+        self.cfg = cfg
+        # self.args_dim = 2 * cfg.args_dim if cfg.rel_targets else cfg.args_dim + 1  # 257
+        if cfg.rel_targets:
+            args_dim = 2 * cfg.args_dim
+        if cfg.bin_targets:
+            args_dim = 8
+        else:
+            args_dim = cfg.args_dim + 1
+        if self.cfg.encode_stages > 0:
+            self.encoder = Encoder(cfg)
+            if cfg.use_resnet:
+                self.resnet = ResNet(cfg.d_model)
+            if cfg.use_vae:
+                self.vae = VAE(cfg)
+            else:
+                self.bottleneck = Bottleneck(cfg)
+                # self.bottleneck2 = Bottleneck(cfg)
+                self.encoder_norm = LayerNorm(cfg.dim_z, elementwise_affine=False)
+            if cfg.use_vqvae:
+                self.vqvae = VectorQuantize(
+                    dim = cfg.dim_z,
+                    codebook_size = cfg.codebook_size,
+                    decay = 0.8,
+                    commitment_weight = 0.,
+                    use_cosine_sim = cfg.use_cosine_sim,
+                )
+        self.decoder = Decoder(cfg)
+        # 定义 self.cmd_args_mask, 但是分配一块持久性缓冲区
+        self.register_buffer("cmd_args_mask", SVGTensor.CMD_ARGS_MASK)
+    def perfect_matching(self, command_logits, args_logits, hierarch_logits, tgt_commands, tgt_args):
+        with torch.no_grad():
+            N, G, S, n_args = tgt_args.shape
+            visibility_mask = _get_visibility_mask(tgt_commands, seq_dim=-1)
+            padding_mask = _get_padding_mask(tgt_commands, seq_dim=-1, extended=True) * visibility_mask.unsqueeze(-1)
+            # Unsqueeze
+            tgt_commands, tgt_args, tgt_hierarch = tgt_commands.unsqueeze(2), tgt_args.unsqueeze(2), visibility_mask.unsqueeze(2)
+            command_logits, args_logits, hierarch_logits = command_logits.unsqueeze(1), args_logits.unsqueeze(1), hierarch_logits.unsqueeze(1).squeeze(-2)
+            # Loss
+            tgt_hierarch, hierarch_logits = tgt_hierarch.repeat(1, 1, self.cfg.num_groups_proposal), hierarch_logits.repeat(1, G, 1, 1)
+            tgt_commands, command_logits = tgt_commands.repeat(1, 1, self.cfg.num_groups_proposal, 1), command_logits.repeat(1, G, 1, 1, 1)
+            tgt_args, args_logits = tgt_args.repeat(1, 1, self.cfg.num_groups_proposal, 1, 1), args_logits.repeat(1, G, 1, 1, 1, 1)
+            padding_mask, mask = padding_mask.unsqueeze(2).repeat(1, 1, self.cfg.num_groups_proposal, 1), self.cmd_args_mask[tgt_commands.long()]
+            loss_args = F.cross_entropy(args_logits.reshape(-1, self.args_dim), tgt_args.reshape(-1).long() + 1, reduction="none").reshape(N, G, self.cfg.num_groups_proposal, S, n_args)    # shift due to -1 PAD_VAL
+            loss_cmd = F.cross_entropy(command_logits.reshape(-1, self.cfg.n_commands), tgt_commands.reshape(-1).long(), reduction="none").reshape(N, G, self.cfg.num_groups_proposal, S)
+            loss_hierarch = F.cross_entropy(hierarch_logits.reshape(-1, 2), tgt_hierarch.reshape(-1).long(), reduction="none").reshape(N, G, self.cfg.num_groups_proposal)
+            loss_args = (loss_args * mask).sum(dim=[-1, -2]) / mask.sum(dim=[-1, -2])
+            loss_cmd = (loss_cmd * padding_mask).sum(dim=-1) / padding_mask.sum(dim=-1)
+            loss = 2.0 * loss_args + 1.0 * loss_cmd + 1.0 * loss_hierarch
+        # Iterate over the batch-dimension
+        assignment_list = []
+        full_set = set(range(self.cfg.num_groups_proposal))
+        for i in range(N):
+            costs = loss[i]
+            mask = visibility_mask[i]
+            _, assign = linear_sum_assignment(costs[mask].cpu())
+            assign = assign.tolist()
+            assignment_list.append(assign + list(full_set - set(assign)))
+        assignment = torch.tensor(assignment_list, device=command_logits.device)
+        return assignment.unsqueeze(-1).unsqueeze(-1)
+    @property
+    def origin_empty_path(self):
+        return torch.tensor([
+            11, 16,  7, 23, 24, 10, 13,  5,  1,  8,  3,  3,  7, 15,  7, 18, 15, 31,
+            21, 31, 16, 10,  2, 14, 26, 14,  6, 13,  7, 28, 11, 19,  9,  6,  7,  1,
+            22, 31, 21,  4, 21,  6,  1,  4, 15, 13, 10, 19,  9, 13, 21, 29, 12, 13,
+            10, 23, 15, 11,  1, 18, 19,  5, 23, 20,  7, 29, 13, 15, 22, 31, 17, 10,
+            21, 28, 13, 20, 24, 30, 21, 28,  5, 22, 14, 15,  3,  7, 14,  1, 19, 23,
+            30, 25, 26, 27, 11, 23,  8,  6,  3, 31, 28, 29, 11,  1,  3,  6,  4, 12,
+            12, 25,  0, 18,  5, 26,  5, 12, 23, 14, 19, 25, 12, 20,  2,  3, 18, 11,
+            1, 12
+        ])
+    # for dalle usage
+    #   indices = model.get_codebook_indices(*model_args)
+    #   commands_y, args_y = model.decode(indices)
+    @torch.no_grad()
+    @eval_decorator
+    def get_codebook_indices(self, commands_enc, args_enc, commands_dec, args_dec):
+        indices = self(commands_enc, args_enc, commands_dec, args_dec, return_indices=True)
+        return indices
+    @torch.no_grad()
+    @eval_decorator
+    def decode(self, codebook_indices):
+        torch.set_printoptions(profile='full')
+        print(codebook_indices.reshape(self.cfg.max_num_groups, self.cfg.max_seq_len + 2))
+        z = self.vqvae.codebook[codebook_indices]  # shape [batch_size, num_of_indices, codebook_dim]
+        # args_z = self.args_vqvae.codebook[codebook_indices]
+        batch_size = z.shape[0]
+        z = z.reshape(self.cfg.max_num_groups, -1, batch_size, self.cfg.dim_z)
+        out_logits = self.decoder(z, None, None)
+        out_logits = _make_batch_first(*out_logits)
+        res = {
+            "command_logits": out_logits[0],  # shape [batch_size, path_num, command_num + 1, 5]
+            "args_logits": out_logits[1],     # shape [batch_size, path_num, command_num + 1, 6]
+            "visibility_logits": out_logits[2]
+        }
+        # hack
+        # commands_y, args_y, _ = self.greedy_sample(res=res, commands_dec=cmd_indices)
+        commands_y, args_y, _ = self.greedy_sample(res=res)
+        # visualization, but it is not responsible for decode()
+        #   tensor_pred = SVGTensor.from_cmd_args(commands_y[0].cpu(), args_y[0].cpu())
+        #   svg_path_sample = SVG.from_tensor(tensor_pred.data, viewbox=Bbox(256), allow_empty=True).normalize().zoom(1.5)
+        #   svg_path_sample.fill_(True)
+        #   svg_path_sample.save_svg('test.svg')
+        return commands_y, args_y
+    def forward(self, commands_enc, args_enc, commands_dec, args_dec, label=None,
+                z=None, hierarch_logits=None,
+                return_tgt=True, params=None, encode_mode=False, return_hierarch=False, return_indices=False):
+        # commands_enc 中包含 commands 的类型
+        # commands_enc.shape: [batch_size, max_num_groups, max_seq_len + 2]
+        # args_enc.shape:     [batch_size, max_num_groups, max_seq_len + 2, n_args]
+        # commands_dec.shape: [batch_size, max_num_groups, max_seq_len + 2]
+        # args_dec.shape:     [batch_size, max_num_groups, max_seq_len + 2, n_args]
+        # assert args_enc.equal(args_dec)
+        commands_enc, args_enc = _make_seq_first(commands_enc, args_enc)  # Possibly None, None
+        commands_dec_, args_dec_ = _make_seq_first(commands_dec, args_dec)
+        # commands_enc.shape: [max_seq_len + 2, max_num_groups, batch_size]
+        # args_enc.shape:     [max_seq_len + 2, max_num_groups, batch_size, 11]
+        if z is None:
+            z = self.encoder(commands_enc, args_enc, label)
+            # cmd_z, args_z = self.encoder(commands_enc, args_enc, label)
+            # print('encoded z', z.shape)
+            if self.cfg.use_resnet:
+                z = self.resnet(z)
+            if self.cfg.use_vae:
+                z, mu, logsigma = self.vae(z)
+            else:
+                # z = self.bottleneck(z)
+                z = self.encoder_norm(self.bottleneck(z))
+                # cmd_z = self.encoder_norm(self.bottleneck(cmd_z))
+                # args_z = self.encoder_norm(self.bottleneck2(args_z))
+                # print('bottleneck z', z)
+                # print('normed z', z, z.shape)
+            if self.cfg.use_vqvae or self.cfg.use_rqvae:
+                # initial z.shape [num_path, 1, batch_size, dim_z]
+                # batch_size, max_num_groups = cmd_z.shape[2], cmd_z.shape[0]
+                batch_size, max_num_groups = z.shape[2], z.shape[0]
+                # print(z.shape)
+                # z = z.reshape(batch_size, -1, self.cfg.dim_z)
+                # z = z.reshape(max_num_groups, -1, self.cfg.dim_z)
+                z = rearrange(z, 'p c b z -> b (p c) z')
+                # cmd_z = cmd_z.reshape(batch_size, -1, self.cfg.dim_z)
+                # args_z = args_z.reshape(batch_size, -1, self.cfg.dim_z)
+                # print(z.shape)
+                # z = rearrange(z, 'p 1 b d -> b 1 p d')  # p: num_of_path
+                #                                         # b: batch_size
+                #                                         # d: dim_z
+                # z = self.conv_enc_layer(z)
+                # z = rearrange(z, 'b c p d -> b (p d) c')      # b d c: batch_size, dim_z, num_channel
+                if self.cfg.use_vqvae:
+                    quantized, indices, commit_loss = self.vqvae(z) # tokenization
+                else:
+                    quantized, indices, commit_loss = self.rqvae(z)
+                if return_indices:
+                    return indices
+                # z = rearrange(quantized, 'b (p d) c -> b c p d', p = max_num_groups if self.cfg.quantize_path else 1)
+                # z = self.conv_dec_layer(z)
+                # z = rearrange(z, 'b 1 p d -> p 1 b d')
+                # z = quantized.reshape(max_num_groups, -1, batch_size, self.cfg.dim_z)
+                z = rearrange(quantized, 'b (p c) z -> p c b z', p = max_num_groups)
+                # cmd_z = cmd_quantized.reshape(max_num_groups, -1, batch_size, self.cfg.dim_z)
+                # args_z = args_quantized.reshape(max_num_groups, -1, batch_size, self.cfg.dim_z)
+                # print(indices)
+                # print('quantized z', z.shape)
+        else:
+            z = _make_seq_first(z)
+        if encode_mode: return z
+        if return_tgt:  # Train mode
+            # remove EOS command
+            # [max_seq_len + 1, max_num_groups, batch_size]
+            commands_dec_, args_dec_ = commands_dec_[:-1], args_dec_[:-1]
+        out_logits = self.decoder(z, commands_dec_, args_dec_, label, hierarch_logits=hierarch_logits,
+                                  return_hierarch=return_hierarch)
+        if return_hierarch:
+            return out_logits
+        out_logits = _make_batch_first(*out_logits)
+        if return_tgt and self.cfg.self_match:  # Assignment
+            assert self.cfg.decode_stages == 2  # Self-matching expects two-stage decoder
+            command_logits, args_logits, hierarch_logits = out_logits
+            assignment = self.perfect_matching(command_logits, args_logits, hierarch_logits, commands_dec[..., 1:], args_dec[..., 1:, :])
+            command_logits = torch.gather(command_logits, dim=1, index=assignment.expand_as(command_logits))
+            args_logits = torch.gather(args_logits, dim=1, index=assignment.unsqueeze(-1).expand_as(args_logits))
+            hierarch_logits = torch.gather(hierarch_logits, dim=1, index=assignment.expand_as(hierarch_logits))
+            out_logits = (command_logits, args_logits, hierarch_logits)
+        res = {
+            "command_logits": out_logits[0],
+            "args_logits": out_logits[1]
+        }
+        if self.cfg.decode_stages == 2:
+            res["visibility_logits"] = out_logits[2]
+        if return_tgt:
+            res["tgt_commands"] = commands_dec
+            res["tgt_args"] = args_dec
+            if self.cfg.use_vae:
+                res["mu"] = _make_batch_first(mu)
+                res["logsigma"] = _make_batch_first(logsigma)
+            if self.cfg.use_vqvae:
+                res["vqvae_loss"] = commit_loss
+        return res
+    def greedy_sample(self, commands_enc=None, args_enc=None, commands_dec=None, args_dec=None, label=None,
+                      z=None, hierarch_logits=None,
+                      concat_groups=True, temperature=0.0001, res=None):
+        if self.cfg.pred_mode == "one_shot":
+            if res is None:
+                res = self.forward(commands_enc, args_enc, commands_dec, args_dec, label=label, z=z, hierarch_logits=hierarch_logits, return_tgt=True)
+            commands_y = _sample_categorical(temperature, res["command_logits"])
+            # hack
+            # commands_y = commands_dec.reshape(1, 8, 32)[..., 1:]
+            if self.cfg.abs_targets:
+                # 此时 args 不需要采样
+                # 模型可能直接输出 -1, 所以我们不需要 args_y -= 1
+                # 但是 SVG 坐标的范围是 0-255, 我们仍然需要 clamp, 并手动将其转换为整数
+                # 那些应该填 "-1" 的位置会在 _make_valid 中被 mask 过滤掉
+                # args_y = torch.clamp(res['args_logits'], min=0, max=255).int()
+                # args_y = torch.clamp(res['args_logits'], min=0, max=256)
+                # args_y = (res['args_logits'] + 1) * 128 - 1
+                args_y = (res['args_logits'] + 1) * 12
+            elif self.cfg.bin_targets:
+                # 此时 args 也不需要采样
+                # 我们需要一个 threshold, logits < threshold is 0, logits >= threshold is 1
+                threshold = 0.0
+                args_logits = res['args_logits']
+                args_y = torch.where(args_logits > threshold, torch.ones_like(args_logits), torch.zeros_like(args_logits))
+                args_y = bit2int(args_y)
+            else:
+                args_y = _sample_categorical(temperature, res["args_logits"])
+                args_y -= 1  # shift due to -1 PAD_VAL
+            visibility_y = _threshold_sample(res["visibility_logits"], threshold=0.7).bool().squeeze(-1) if self.cfg.decode_stages == 2 else None
+            commands_y, args_y = self._make_valid(commands_y, args_y, visibility_y)
+        else:
+            if z is None:
+                z = self.forward(commands_enc, args_enc, None, None, label=label, encode_mode=True)
+            PAD_VAL = 0
+            commands_y, args_y = z.new_zeros(1, 1, 1).fill_(SVGTensor.COMMANDS_SIMPLIFIED.index("SOS")).long(), z.new_ones(1, 1, 1, self.cfg.n_args).fill_(PAD_VAL).long()
+            for i in range(self.cfg.max_total_len):
+                res = self.forward(None, None, commands_y, args_y, label=label, z=z, hierarch_logits=hierarch_logits, return_tgt=False)
+                commands_new_y, args_new_y = _sample_categorical(temperature, res["command_logits"], res["args_logits"])
+                args_new_y -= 1  # shift due to -1 PAD_VAL
+                _, args_new_y = self._make_valid(commands_new_y, args_new_y)
+                commands_y, args_y = torch.cat([commands_y, commands_new_y[..., -1:]], dim=-1), torch.cat([args_y, args_new_y[..., -1:, :]], dim=-2)
+            commands_y, args_y = commands_y[..., 1:], args_y[..., 1:, :]  # Discard SOS token
+        if self.cfg.rel_targets:
+            args_y = self._make_absolute(commands_y, args_y)
+        if concat_groups:
+            N = commands_y.size(0)
+            # 必须使用 commands_y, 而不能用 tgt_commands
+            # 因为 commands_y 可能会有多余的 EOS, EOS 是无法可视化的
+            padding_mask_y = _get_padding_mask(commands_y, seq_dim=-1).bool()
+            commands_y, args_y = commands_y[padding_mask_y].reshape(N, -1), args_y[padding_mask_y].reshape(N, -1, self.cfg.n_args)
+        return commands_y, args_y, res
+    def _make_valid(self, commands_y, args_y, visibility_y=None, PAD_VAL=0):
+        if visibility_y is not None:
+            S = commands_y.size(-1)
+            commands_y[~visibility_y] = commands_y.new_tensor([SVGTensor.COMMANDS_SIMPLIFIED.index("m"), *[SVGTensor.COMMANDS_SIMPLIFIED.index("EOS")] * (S - 1)])
+            args_y[~visibility_y] = PAD_VAL
+        mask = self.cmd_args_mask[commands_y.long()].bool()
+        args_y[~mask] = PAD_VAL
+        return commands_y, args_y
+    def _make_absolute(self, commands_y, args_y):
+        mask = self.cmd_args_mask[commands_y.long()].bool()
+        args_y[mask] -= self.cfg.args_dim - 1
+        real_commands = commands_y < SVGTensor.COMMANDS_SIMPLIFIED.index("EOS")
+        args_real_commands = args_y[real_commands]
+        end_pos = args_real_commands[:-1, SVGTensor.IndexArgs.END_POS].cumsum(dim=0)
+        args_real_commands[1:, SVGTensor.IndexArgs.CONTROL1] += end_pos
+        args_real_commands[1:, SVGTensor.IndexArgs.CONTROL2] += end_pos
+        args_real_commands[1:, SVGTensor.IndexArgs.END_POS] += end_pos
+        args_y[real_commands] = args_real_commands
+        _, args_y = self._make_valid(commands_y, args_y)
+        return args_y

deepsvg/model/utils.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+from deepsvg.difflib.tensor import SVGTensor
+from torch.distributions.categorical import Categorical
+import torch.nn.functional as F
+def _get_key_padding_mask(commands, seq_dim=0):
+    """
+    Args:
+        commands: Shape [S, ...]
+    """
+    with torch.no_grad():
+        key_padding_mask = (commands == SVGTensor.COMMANDS_SIMPLIFIED.index("EOS")).cumsum(dim=seq_dim) > 0
+        if seq_dim == 0:
+            return key_padding_mask.transpose(0, 1)
+        return key_padding_mask
+def _get_padding_mask(commands, seq_dim=0, extended=False):
+    with torch.no_grad():
+        padding_mask = (commands == SVGTensor.COMMANDS_SIMPLIFIED.index("EOS")).cumsum(dim=seq_dim) == 0
+        padding_mask = padding_mask.float()
+        if extended:
+            # padding_mask doesn't include the final EOS, extend by 1 position to include it in the loss
+            S = commands.size(seq_dim)
+            torch.narrow(padding_mask, seq_dim, 3, S-3).add_(torch.narrow(padding_mask, seq_dim, 0, S-3)).clamp_(max=1)
+        if seq_dim == 0:
+            return padding_mask.unsqueeze(-1)
+        return padding_mask
+def _get_group_mask(commands, seq_dim=0):
+    """
+    Args:
+        commands: Shape [S, ...]
+    """
+    with torch.no_grad():
+        group_mask = (commands == SVGTensor.COMMANDS_SIMPLIFIED.index("m")).cumsum(dim=seq_dim)
+        return group_mask
+def _get_visibility_mask(commands, seq_dim=0):
+    """
+    Args:
+        commands: Shape [S, ...]
+    """
+    S = commands.size(seq_dim)
+    with torch.no_grad():
+        visibility_mask = (commands == SVGTensor.COMMANDS_SIMPLIFIED.index("EOS")).sum(dim=seq_dim) < S - 1
+        if seq_dim == 0:
+            return visibility_mask.unsqueeze(-1)
+        return visibility_mask
+def _get_key_visibility_mask(commands, seq_dim=0):
+    S = commands.size(seq_dim)
+    with torch.no_grad():
+        key_visibility_mask = (commands == SVGTensor.COMMANDS_SIMPLIFIED.index("EOS")).sum(dim=seq_dim) >= S - 1
+        if seq_dim == 0:
+            return key_visibility_mask.transpose(0, 1)
+        return key_visibility_mask
+def _generate_square_subsequent_mask(sz):
+    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+    return mask
+def _sample_categorical(temperature=0.0001, *args_logits):
+    if len(args_logits) == 1:
+        arg_logits, = args_logits
+        return Categorical(logits=arg_logits / temperature).sample()
+    return (*(Categorical(logits=arg_logits / temperature).sample() for arg_logits in args_logits),)
+def _threshold_sample(arg_logits, threshold=0.5, temperature=1.0):
+    scores = F.softmax(arg_logits / temperature, dim=-1)[..., 1]
+    return scores > threshold

deepsvg/model/vector_quantize_pytorch.py ADDED Viewed

	@@ -0,0 +1,605 @@

+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+import torch.distributed as distributed
+from torch.cuda.amp import autocast
+from einops import rearrange, repeat
+from contextlib import contextmanager
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+def noop(*args, **kwargs):
+    pass
+def l2norm(t):
+    return F.normalize(t, p = 2, dim = -1)
+def log(t, eps = 1e-20):
+    return torch.log(t.clamp(min = eps))
+def uniform_init(*shape):
+    t = torch.empty(shape)
+    nn.init.kaiming_uniform_(t)
+    return t
+def gumbel_noise(t):
+    noise = torch.zeros_like(t).uniform_(0, 1)
+    return -log(-log(noise))
+def gumbel_sample(t, temperature = 1., dim = -1):
+    if temperature == 0:
+        return t.argmax(dim = dim)
+    return ((t / temperature) + gumbel_noise(t)).argmax(dim = dim)
+def ema_inplace(moving_avg, new, decay):
+    moving_avg.data.mul_(decay).add_(new, alpha = (1 - decay))
+def laplace_smoothing(x, n_categories, eps = 1e-5):
+    return (x + eps) / (x.sum() + n_categories * eps)
+def sample_vectors(samples, num):
+    num_samples, device = samples.shape[0], samples.device
+    if num_samples >= num:
+        indices = torch.randperm(num_samples, device = device)[:num]
+    else:
+        indices = torch.randint(0, num_samples, (num,), device = device)
+    return samples[indices]
+def batched_sample_vectors(samples, num):
+    return torch.stack([sample_vectors(sample, num) for sample in samples.unbind(dim = 0)], dim = 0)
+def pad_shape(shape, size, dim = 0):
+    return [size if i == dim else s for i, s in enumerate(shape)]
+def sample_multinomial(total_count, probs):
+    device = probs.device
+    probs = probs.cpu()
+    total_count = probs.new_full((), total_count)
+    remainder = probs.new_ones(())
+    sample = torch.empty_like(probs, dtype = torch.long)
+    for i, p in enumerate(probs):
+        s = torch.binomial(total_count, p / remainder)
+        sample[i] = s
+        total_count -= s
+        remainder -= p
+    return sample.to(device)
+def all_gather_sizes(x, dim):
+    size = torch.tensor(x.shape[dim], dtype = torch.long, device = x.device)
+    all_sizes = [torch.empty_like(size) for _ in range(distributed.get_world_size())]
+    distributed.all_gather(all_sizes, size)
+    return torch.stack(all_sizes)
+def all_gather_variably_sized(x, sizes, dim = 0):
+    rank = distributed.get_rank()
+    all_x = []
+    for i, size in enumerate(sizes):
+        t = x if i == rank else x.new_empty(pad_shape(x.shape, size, dim))
+        distributed.broadcast(t, src = i, async_op = True)
+        all_x.append(t)
+    distributed.barrier()
+    return all_x
+def sample_vectors_distributed(local_samples, num):
+    local_samples = rearrange(local_samples, '1 ... -> ...')
+    rank = distributed.get_rank()
+    all_num_samples = all_gather_sizes(local_samples, dim = 0)
+    if rank == 0:
+        samples_per_rank = sample_multinomial(num, all_num_samples / all_num_samples.sum())
+    else:
+        samples_per_rank = torch.empty_like(all_num_samples)
+    distributed.broadcast(samples_per_rank, src = 0)
+    samples_per_rank = samples_per_rank.tolist()
+    local_samples = sample_vectors(local_samples, samples_per_rank[rank])
+    all_samples = all_gather_variably_sized(local_samples, samples_per_rank, dim = 0)
+    out = torch.cat(all_samples, dim = 0)
+    return rearrange(out, '... -> 1 ...')
+def batched_bincount(x, *, minlength):
+    batch, dtype, device = x.shape[0], x.dtype, x.device
+    target = torch.zeros(batch, minlength, dtype = dtype, device = device)
+    values = torch.ones_like(x)
+    target.scatter_add_(-1, x, values)
+    return target
+def kmeans(
+    samples,
+    num_clusters,
+    num_iters = 10,
+    use_cosine_sim = False,
+    sample_fn = batched_sample_vectors,
+    all_reduce_fn = noop
+):
+    num_codebooks, dim, dtype, device = samples.shape[0], samples.shape[-1], samples.dtype, samples.device
+    means = sample_fn(samples, num_clusters)
+    for _ in range(num_iters):
+        if use_cosine_sim:
+            dists = samples @ rearrange(means, 'h n d -> h d n')
+        else:
+            dists = -torch.cdist(samples, means, p = 2)
+        buckets = torch.argmax(dists, dim = -1)
+        bins = batched_bincount(buckets, minlength = num_clusters)
+        all_reduce_fn(bins)
+        zero_mask = bins == 0
+        bins_min_clamped = bins.masked_fill(zero_mask, 1)
+        new_means = buckets.new_zeros(num_codebooks, num_clusters, dim, dtype = dtype)
+        new_means.scatter_add_(1, repeat(buckets, 'h n -> h n d', d = dim), samples)
+        new_means = new_means / rearrange(bins_min_clamped, '... -> ... 1')
+        all_reduce_fn(new_means)
+        if use_cosine_sim:
+            new_means = l2norm(new_means)
+        means = torch.where(
+            rearrange(zero_mask, '... -> ... 1'),
+            means,
+            new_means
+        )
+    return means, bins
+def batched_embedding(indices, embeds):
+    batch, dim = indices.shape[1], embeds.shape[-1]
+    indices = repeat(indices, 'h b n -> h b n d', d = dim)
+    embeds = repeat(embeds, 'h c d -> h b c d', b = batch)
+    return embeds.gather(2, indices)
+# regularization losses
+def orthogonal_loss_fn(t):
+    # eq (2) from https://arxiv.org/abs/2112.00384
+    h, n = t.shape[:2]
+    normed_codes = l2norm(t)
+    cosine_sim = einsum('h i d, h j d -> h i j', normed_codes, normed_codes)
+    return (cosine_sim ** 2).sum() / (h * n ** 2) - (1 / n)
+# distance types
+class EuclideanCodebook(nn.Module):
+    def __init__(
+        self,
+        dim,
+        codebook_size,
+        num_codebooks = 1,
+        kmeans_init = False,
+        kmeans_iters = 10,
+        sync_kmeans = True,
+        decay = 0.8,
+        eps = 1e-5,
+        threshold_ema_dead_code = 2,
+        use_ddp = False,
+        learnable_codebook = False,
+        sample_codebook_temp = 0
+    ):
+        super().__init__()
+        self.decay = decay
+        init_fn = uniform_init if not kmeans_init else torch.zeros
+        embed = init_fn(num_codebooks, codebook_size, dim)
+        self.codebook_size = codebook_size
+        self.num_codebooks = num_codebooks
+        self.kmeans_iters = kmeans_iters
+        self.eps = eps
+        self.threshold_ema_dead_code = threshold_ema_dead_code
+        self.sample_codebook_temp = sample_codebook_temp
+        assert not (use_ddp and num_codebooks > 1 and kmeans_init), 'kmeans init is not compatible with multiple codebooks in distributed environment for now'
+        self.sample_fn = sample_vectors_distributed if use_ddp and sync_kmeans else batched_sample_vectors
+        self.kmeans_all_reduce_fn = distributed.all_reduce if use_ddp and sync_kmeans else noop
+        self.all_reduce_fn = distributed.all_reduce if use_ddp else noop
+        self.register_buffer('initted', torch.Tensor([not kmeans_init]))
+        self.register_buffer('cluster_size', torch.zeros(num_codebooks, codebook_size))
+        self.register_buffer('embed_avg', embed.clone())
+        self.learnable_codebook = learnable_codebook
+        if learnable_codebook:
+            self.embed = nn.Parameter(embed)
+        else:
+            self.register_buffer('embed', embed)
+    @torch.jit.ignore
+    def init_embed_(self, data):
+        if self.initted:
+            return
+        embed, cluster_size = kmeans(
+            data,
+            self.codebook_size,
+            self.kmeans_iters,
+            sample_fn = self.sample_fn,
+            all_reduce_fn = self.kmeans_all_reduce_fn
+        )
+        self.embed.data.copy_(embed)
+        self.embed_avg.data.copy_(embed.clone())
+        self.cluster_size.data.copy_(cluster_size)
+        self.initted.data.copy_(torch.Tensor([True]))
+    def replace(self, batch_samples, batch_mask):
+        batch_samples = l2norm(batch_samples)
+        for ind, (samples, mask) in enumerate(zip(batch_samples.unbind(dim = 0), batch_mask.unbind(dim = 0))):
+            if not torch.any(mask):
+                continue
+            sampled = self.sample_fn(rearrange(samples, '... -> 1 ...'), mask.sum().item())
+            self.embed.data[ind][mask] = rearrange(sampled, '1 ... -> ...')
+    def expire_codes_(self, batch_samples):
+        if self.threshold_ema_dead_code == 0:
+            return
+        expired_codes = self.cluster_size < self.threshold_ema_dead_code
+        if not torch.any(expired_codes):
+            return
+        batch_samples = rearrange(batch_samples, 'h ... d -> h (...) d')
+        self.replace(batch_samples, batch_mask = expired_codes)
+    @autocast(enabled = False)
+    def forward(self, x):
+        needs_codebook_dim = x.ndim < 4
+        x = x.float()
+        if needs_codebook_dim:
+            x = rearrange(x, '... -> 1 ...')
+        shape, dtype = x.shape, x.dtype
+        flatten = rearrange(x, 'h ... d -> h (...) d')
+        self.init_embed_(flatten)
+        embed = self.embed if not self.learnable_codebook else self.embed.detach()
+        dist = -torch.cdist(flatten, embed, p = 2)
+        embed_ind = gumbel_sample(dist, dim = -1, temperature = self.sample_codebook_temp)
+        embed_onehot = F.one_hot(embed_ind, self.codebook_size).type(dtype)
+        embed_ind = embed_ind.view(*shape[:-1])
+        quantize = batched_embedding(embed_ind, self.embed)
+        if self.training:
+            cluster_size = embed_onehot.sum(dim = 1)
+            self.all_reduce_fn(cluster_size)
+            ema_inplace(self.cluster_size, cluster_size, self.decay)
+            embed_sum = einsum('h n d, h n c -> h c d', flatten, embed_onehot)
+            self.all_reduce_fn(embed_sum.contiguous())
+            ema_inplace(self.embed_avg, embed_sum, self.decay)
+            cluster_size = laplace_smoothing(self.cluster_size, self.codebook_size, self.eps) * self.cluster_size.sum()
+            embed_normalized = self.embed_avg / rearrange(cluster_size, '... -> ... 1')
+            self.embed.data.copy_(embed_normalized)
+            self.expire_codes_(x)
+        if needs_codebook_dim:
+            quantize, embed_ind = map(lambda t: rearrange(t, '1 ... -> ...'), (quantize, embed_ind))
+        return quantize, embed_ind
+class CosineSimCodebook(nn.Module):
+    def __init__(
+        self,
+        dim,
+        codebook_size,
+        num_codebooks = 1,
+        kmeans_init = False,
+        kmeans_iters = 10,
+        sync_kmeans = True,
+        decay = 0.8,
+        eps = 1e-5,
+        threshold_ema_dead_code = 2,
+        use_ddp = False,
+        learnable_codebook = False,
+        sample_codebook_temp = 0.
+    ):
+        super().__init__()
+        self.decay = decay
+        if not kmeans_init:
+            embed = l2norm(uniform_init(num_codebooks, codebook_size, dim))
+        else:
+            embed = torch.zeros(num_codebooks, codebook_size, dim)
+        self.codebook_size = codebook_size
+        self.num_codebooks = num_codebooks
+        self.kmeans_iters = kmeans_iters
+        self.eps = eps
+        self.threshold_ema_dead_code = threshold_ema_dead_code
+        self.sample_codebook_temp = sample_codebook_temp
+        self.sample_fn = sample_vectors_distributed if use_ddp and sync_kmeans else batched_sample_vectors
+        self.kmeans_all_reduce_fn = distributed.all_reduce if use_ddp and sync_kmeans else noop
+        self.all_reduce_fn = distributed.all_reduce if use_ddp else noop
+        self.register_buffer('initted', torch.Tensor([not kmeans_init]))
+        self.register_buffer('cluster_size', torch.zeros(num_codebooks, codebook_size))
+        self.learnable_codebook = learnable_codebook
+        if learnable_codebook:
+            self.embed = nn.Parameter(embed)
+        else:
+            self.register_buffer('embed', embed)
+    @torch.jit.ignore
+    def init_embed_(self, data):
+        if self.initted:
+            return
+        embed, cluster_size = kmeans(
+            data,
+            self.codebook_size,
+            self.kmeans_iters,
+            use_cosine_sim = True,
+            sample_fn = self.sample_fn,
+            all_reduce_fn = self.kmeans_all_reduce_fn
+        )
+        self.embed.data.copy_(embed)
+        self.cluster_size.data.copy_(cluster_size)
+        self.initted.data.copy_(torch.Tensor([True]))
+    def replace(self, batch_samples, batch_mask):
+        batch_samples = l2norm(batch_samples)
+        for ind, (samples, mask) in enumerate(zip(batch_samples.unbind(dim = 0), batch_mask.unbind(dim = 0))):
+            if not torch.any(mask):
+                continue
+            sampled = self.sample_fn(rearrange(samples, '... -> 1 ...'), mask.sum().item())
+            self.embed.data[ind][mask] = rearrange(sampled, '1 ... -> ...')
+    def expire_codes_(self, batch_samples):
+        if self.threshold_ema_dead_code == 0:
+            return
+        expired_codes = self.cluster_size < self.threshold_ema_dead_code
+        if not torch.any(expired_codes):
+            return
+        batch_samples = rearrange(batch_samples, 'h ... d -> h (...) d')
+        self.replace(batch_samples, batch_mask = expired_codes)
+    @autocast(enabled = False)
+    def forward(self, x):
+        needs_codebook_dim = x.ndim < 4
+        x = x.float()
+        if needs_codebook_dim:
+            x = rearrange(x, '... -> 1 ...')
+        shape, dtype = x.shape, x.dtype
+        flatten = rearrange(x, 'h ... d -> h (...) d')
+        flatten = l2norm(flatten)
+        self.init_embed_(flatten)
+        embed = self.embed if not self.learnable_codebook else self.embed.detach()
+        embed = l2norm(embed)
+        dist = einsum('h n d, h c d -> h n c', flatten, embed)
+        embed_ind = gumbel_sample(dist, dim = -1, temperature = self.sample_codebook_temp)
+        embed_onehot = F.one_hot(embed_ind, self.codebook_size).type(dtype)
+        embed_ind = embed_ind.view(*shape[:-1])
+        quantize = batched_embedding(embed_ind, self.embed)
+        if self.training:
+            bins = embed_onehot.sum(dim = 1)
+            self.all_reduce_fn(bins)
+            ema_inplace(self.cluster_size, bins, self.decay)
+            zero_mask = (bins == 0)
+            bins = bins.masked_fill(zero_mask, 1.)
+            embed_sum = einsum('h n d, h n c -> h c d', flatten, embed_onehot)
+            self.all_reduce_fn(embed_sum)
+            embed_normalized = embed_sum / rearrange(bins, '... -> ... 1')
+            embed_normalized = l2norm(embed_normalized)
+            embed_normalized = torch.where(
+                rearrange(zero_mask, '... -> ... 1'),
+                embed,
+                embed_normalized
+            )
+            ema_inplace(self.embed, embed_normalized, self.decay)
+            self.expire_codes_(x)
+        if needs_codebook_dim:
+            quantize, embed_ind = map(lambda t: rearrange(t, '1 ... -> ...'), (quantize, embed_ind))
+        return quantize, embed_ind
+# main class
+class VectorQuantize(nn.Module):
+    def __init__(
+        self,
+        dim,
+        codebook_size,
+        codebook_dim = None,
+        heads = 1,
+        separate_codebook_per_head = False,
+        decay = 0.8,
+        eps = 1e-5,
+        kmeans_init = False,
+        kmeans_iters = 10,
+        sync_kmeans = True,
+        use_cosine_sim = False,
+        threshold_ema_dead_code = 0,
+        channel_last = True,
+        accept_image_fmap = False,
+        commitment_weight = 1.,
+        orthogonal_reg_weight = 0.,
+        orthogonal_reg_active_codes_only = False,
+        orthogonal_reg_max_codes = None,
+        sample_codebook_temp = 0.,
+        sync_codebook = False
+    ):
+        super().__init__()
+        self.heads = heads
+        self.separate_codebook_per_head = separate_codebook_per_head
+        codebook_dim = default(codebook_dim, dim)
+        codebook_input_dim = codebook_dim * heads
+        requires_projection = codebook_input_dim != dim
+        self.project_in = nn.Linear(dim, codebook_input_dim) if requires_projection else nn.Identity()
+        self.project_out = nn.Linear(codebook_input_dim, dim) if requires_projection else nn.Identity()
+        self.eps = eps
+        self.commitment_weight = commitment_weight
+        has_codebook_orthogonal_loss = orthogonal_reg_weight > 0
+        self.orthogonal_reg_weight = orthogonal_reg_weight
+        self.orthogonal_reg_active_codes_only = orthogonal_reg_active_codes_only
+        self.orthogonal_reg_max_codes = orthogonal_reg_max_codes
+        codebook_class = EuclideanCodebook if not use_cosine_sim else CosineSimCodebook
+        self._codebook = codebook_class(
+            dim = codebook_dim,
+            num_codebooks = heads if separate_codebook_per_head else 1,
+            codebook_size = codebook_size,
+            kmeans_init = kmeans_init,
+            kmeans_iters = kmeans_iters,
+            sync_kmeans = sync_kmeans,
+            decay = decay,
+            eps = eps,
+            threshold_ema_dead_code = threshold_ema_dead_code,
+            use_ddp = sync_codebook,
+            learnable_codebook = has_codebook_orthogonal_loss,
+            sample_codebook_temp = sample_codebook_temp
+        )
+        self.codebook_size = codebook_size
+        self.accept_image_fmap = accept_image_fmap
+        self.channel_last = channel_last
+    @property
+    def codebook(self):
+        codebook = self._codebook.embed
+        if self.separate_codebook_per_head:
+            return codebook
+        return rearrange(codebook, '1 ... -> ...')
+    def forward(
+        self,
+        x,
+        mask = None
+    ):
+        shape, device, heads, is_multiheaded, codebook_size = x.shape, x.device, self.heads, self.heads > 1, self.codebook_size
+        need_transpose = not self.channel_last and not self.accept_image_fmap
+        if self.accept_image_fmap:
+            height, width = x.shape[-2:]
+            x = rearrange(x, 'b c h w -> b (h w) c')
+        if need_transpose:
+            x = rearrange(x, 'b d n -> b n d')
+        x = self.project_in(x)
+        if is_multiheaded:
+            ein_rhs_eq = 'h b n d' if self.separate_codebook_per_head else '1 (b h) n d'
+            x = rearrange(x, f'b n (h d) -> {ein_rhs_eq}', h = heads)
+        quantize, embed_ind = self._codebook(x)
+        if self.training:
+            quantize = x + (quantize - x).detach()
+        loss = torch.tensor([0.], device = device, requires_grad = self.training)
+        if self.training:
+            if self.commitment_weight > 0:
+                detached_quantize = quantize.detach()
+                if exists(mask):
+                    # with variable lengthed sequences
+                    commit_loss = F.mse_loss(detached_quantize, x, reduction = 'none')
+                    if is_multiheaded:
+                        mask = repeat(mask, 'b n -> c (b h) n', c = commit_loss.shape[0], h = commit_loss.shape[1] // mask.shape[0])
+                    commit_loss = commit_loss[mask].mean()
+                else:
+                    commit_loss = F.mse_loss(detached_quantize, x)
+                loss = loss + commit_loss * self.commitment_weight
+            if self.orthogonal_reg_weight > 0:
+                codebook = self._codebook.embed
+                if self.orthogonal_reg_active_codes_only:
+                    # only calculate orthogonal loss for the activated codes for this batch
+                    unique_code_ids = torch.unique(embed_ind)
+                    codebook = codebook[unique_code_ids]
+                num_codes = codebook.shape[0]
+                if exists(self.orthogonal_reg_max_codes) and num_codes > self.orthogonal_reg_max_codes:
+                    rand_ids = torch.randperm(num_codes, device = device)[:self.orthogonal_reg_max_codes]
+                    codebook = codebook[rand_ids]
+                orthogonal_reg_loss = orthogonal_loss_fn(codebook)
+                loss = loss + orthogonal_reg_loss * self.orthogonal_reg_weight
+        if is_multiheaded:
+            if self.separate_codebook_per_head:
+                quantize = rearrange(quantize, 'h b n d -> b n (h d)', h = heads)
+                embed_ind = rearrange(embed_ind, 'h b n -> b n h', h = heads)
+            else:
+                quantize = rearrange(quantize, '1 (b h) n d -> b n (h d)', h = heads)
+                embed_ind = rearrange(embed_ind, '1 (b h) n -> b n h', h = heads)
+        quantize = self.project_out(quantize)
+        if need_transpose:
+            quantize = rearrange(quantize, 'b n d -> b d n')
+        if self.accept_image_fmap:
+            quantize = rearrange(quantize, 'b (h w) c -> b c h w', h = height, w = width)
+            embed_ind = rearrange(embed_ind, 'b (h w) ... -> b h w ...', h = height, w = width)
+        return quantize, embed_ind, loss

deepsvg/schedulers/warmup.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.lr_scheduler import ReduceLROnPlateau
+class GradualWarmupScheduler(_LRScheduler):
+    """ Gradually warm-up(increasing) learning rate in optimizer.
+    Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        multiplier: target learning rate = base lr * multiplier if multiplier > 1.0. if multiplier = 1.0, lr starts from 0 and ends up with the base_lr.
+        total_epoch: target learning rate is reached at total_epoch, gradually
+        after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau)
+    """
+    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
+        self.multiplier = multiplier
+        if self.multiplier < 1.:
+            raise ValueError('multiplier should be greater thant or equal to 1.')
+        self.total_epoch = total_epoch
+        self.after_scheduler = after_scheduler
+        self.finished = False
+        super(GradualWarmupScheduler, self).__init__(optimizer)
+    def get_lr(self):
+        if self.last_epoch > self.total_epoch:
+            if self.after_scheduler:
+                if not self.finished:
+                    self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
+                    self.finished = True
+                return self.after_scheduler.get_last_lr()
+            return [base_lr * self.multiplier for base_lr in self.base_lrs]
+        if self.multiplier == 1.0:
+            return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
+        else:
+            return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
+    def step_ReduceLROnPlateau(self, metrics, epoch=None):
+        if epoch is None:
+            epoch = self.last_epoch + 1
+        self.last_epoch = epoch if epoch != 0 else 1  # ReduceLROnPlateau is called at the end of epoch, whereas others are called at beginning
+        if self.last_epoch <= self.total_epoch:
+            # warmup_lr = [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
+            if self.multiplier == 1.0:
+                warmup_lr = [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
+            else:
+                warmup_lr = [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
+            for param_group, lr in zip(self.optimizer.param_groups, warmup_lr):
+                param_group['lr'] = lr
+        else:
+            if epoch is None:
+                self.after_scheduler.step(metrics, None)
+            else:
+                self.after_scheduler.step(metrics, epoch - self.total_epoch)
+    def step(self, epoch=None, metrics=None):
+        if type(self.after_scheduler) != ReduceLROnPlateau:
+            if self.finished and self.after_scheduler:
+                if epoch is None:
+                    self.after_scheduler.step(None)
+                else:
+                    self.after_scheduler.step(epoch - self.total_epoch)
+                self._last_lr = self.after_scheduler.get_last_lr()
+            else:
+                return super(GradualWarmupScheduler, self).step(epoch)
+        else:
+            self.step_ReduceLROnPlateau(metrics, epoch)

deepsvg/svg_dataset.py ADDED Viewed

	@@ -0,0 +1,269 @@

+from deepsvg.config import _Config
+from deepsvg.difflib.tensor import SVGTensor
+from deepsvg.svglib.svg import SVG
+from deepsvg.svglib.geom import Point, Angle
+# from deepsvg import utils
+import math
+import torch
+import torch.utils.data
+import random
+from typing import List, Union
+import pandas as pd
+import os
+import pickle
+from sklearn.model_selection import train_test_split
+Num = Union[int, float]
+class SVGDataset(torch.utils.data.Dataset):
+    def __init__(self, df, data_dir, model_args, max_num_groups, max_seq_len, max_total_len=None, PAD_VAL=0,
+                 nb_augmentations=1, already_preprocessed=True):
+        self.data_dir = data_dir
+        self.already_preprocessed = already_preprocessed
+        self.MAX_NUM_GROUPS = max_num_groups
+        self.MAX_SEQ_LEN = max_seq_len
+        self.MAX_TOTAL_LEN = max_total_len
+        if max_total_len is None:
+            self.MAX_TOTAL_LEN = max_num_groups * max_seq_len
+        # if df is None:
+        #     df = pd.read_csv(meta_filepath)
+        # if len(df) > 0:
+        #     if filter_uni is not None:
+        #         df = df[df.uni.isin(filter_uni)]
+        #     if filter_platform is not None:
+        #         df = df[df.platform.isin(filter_platform)]
+        #     if filter_category is not None:
+        #         df = df[df.category.isin(filter_category)]
+        #     df = df[(df.nb_groups <= max_num_groups) & (df.max_len_group <= max_seq_len)]
+        #     if max_total_len is not None:
+        #         df = df[df.total_len <= max_total_len]
+        # self.df = df.sample(frac=train_ratio) if train_ratio < 1.0 else df
+        self.df = df
+        self.model_args = model_args
+        self.PAD_VAL = PAD_VAL
+        self.nb_augmentations = nb_augmentations
+    def search_name(self, name):
+        return self.df[self.df.commonName.str.contains(name)]
+    def _filter_categories(self, filter_category):
+        self.df = self.df[self.df.category.isin(filter_category)]
+    @staticmethod
+    def _uni_to_label(uni):
+        if 48 <= uni <= 57:
+            return uni - 48
+        elif 65 <= uni <= 90:
+            return uni - 65 + 10
+        return uni - 97 + 36
+    @staticmethod
+    def _label_to_uni(label_id):
+        if 0 <= label_id <= 9:
+            return label_id + 48
+        elif 10 <= label_id <= 35:
+            return label_id + 65 - 10
+        return label_id + 97 - 36
+    @staticmethod
+    def _category_to_label(category):
+        categories = ['characters', 'free-icons', 'logos', 'alphabet', 'animals', 'arrows', 'astrology', 'baby', 'beauty',
+                      'business', 'cinema', 'city', 'clothing', 'computer-hardware', 'crime', 'cultures', 'data', 'diy',
+                      'drinks', 'ecommerce', 'editing', 'files', 'finance', 'folders', 'food', 'gaming', 'hands', 'healthcare',
+                      'holidays', 'household', 'industry', 'maps', 'media-controls', 'messaging', 'military', 'mobile',
+                      'music', 'nature', 'network', 'photo-video', 'plants', 'printing',  'profile', 'programming', 'science',
+                      'security', 'shopping', 'social-networks', 'sports', 'time-and-date', 'transport', 'travel', 'user-interface',
+                      'users', 'weather', 'flags', 'emoji', 'men', 'women']
+        return categories.index(category)
+    def get_label(self, idx=0, entry=None):
+        # if entry is None:
+        #     entry = self.df.iloc[idx]
+        # if "uni" in self.df.columns:  # Font dataset
+        #     label = self._uni_to_label(entry.uni)
+        #     return torch.tensor(label)
+        # elif "category" in self.df.columns:  # Icons dataset
+        #     label = self._category_to_label(entry.category)
+        #     return torch.tensor(label)
+        if "label" in self.df.columns:
+            return self.df.iloc[idx]['label']
+    def idx_to_id(self, idx):
+        return self.df.iloc[idx].id
+    def entry_from_id(self, id):
+        return self.df[self.df.id == str(id)].iloc[0]
+    def _load_svg(self, icon_id):
+        svg = SVG.load_svg(os.path.join(self.data_dir, f"{icon_id}.svg"))
+        if not self.already_preprocessed:
+            svg.fill_(False)
+            svg.normalize().zoom(0.9)
+            svg.canonicalize()
+            svg = svg.simplify_heuristic()
+        return svg
+    def __len__(self):
+        return len(self.df) * self.nb_augmentations
+    def random_icon(self):
+        return self[random.randrange(0, len(self))]
+    def random_id(self):
+        idx = random.randrange(0, len(self)) % len(self.df)
+        return self.idx_to_id(idx)
+    def random_id_by_uni(self, uni):
+        df = self.df[self.df.uni == uni]
+        return df.id.sample().iloc[0]
+    def __getitem__(self, idx):
+        return self.get(idx, self.model_args)
+    @staticmethod
+    def _augment(svg, mean=False):
+        # aug 2
+        # dx = random.randint(0, 10)
+        # dy = random.randint(0, 10)
+        # factor = 0.02 * dx + 0.8
+        # return svg.zoom(factor).translate(Point(dx / 6, dy / 6)).rotate(Angle((dx - 5) / 2))
+        # aug 1
+        n = random.random() % 10  # [0, 9]
+        dx, dy = (0, 0) if mean else (n / 9, n / 9)
+        factor = 0.7 if mean else 0.02 * n + 0.8
+        return svg.zoom(factor).translate(Point(dx, dy))
+        # return svg.zoom(factor)
+    @staticmethod
+    def simplify(svg, normalize=True):
+        svg.canonicalize(normalize=normalize)
+        svg = svg.simplify_heuristic()
+        return svg.normalize()
+    @staticmethod
+    def preprocess(svg, augment=True, numericalize=True, mean=False):
+        if augment:
+            svg = SVGDataset._augment(svg, mean=mean)
+        if numericalize:
+            return svg.numericalize(256)
+        return svg
+    def get(self, idx=0, model_args=None, random_aug=True, id=None, svg: SVG=None):
+        if id is None:
+            idx = idx % len(self.df)
+            id = self.idx_to_id(idx)
+            # utils.set_value('id', id)
+        if svg is None:
+            svg = self._load_svg(id)
+            svg = SVGDataset.preprocess(svg, augment=random_aug, numericalize=False)
+        t_sep, fillings = svg.to_tensor(concat_groups=False, PAD_VAL=self.PAD_VAL), svg.to_fillings()
+        label = self.get_label(idx)
+        return self.get_data(t_sep, fillings, model_args=model_args, label=label)
+    def get_data(self, t_sep, fillings, model_args=None, label=None):
+        res = {}
+        if model_args is None:
+            model_args = self.model_args
+        pad_len = max(self.MAX_NUM_GROUPS - len(t_sep), 0)
+        t_sep.extend([torch.empty(0, 9)] * pad_len)
+        # t_sep.extend([torch.empty(0, 14)] * pad_len)
+        fillings.extend([0] * pad_len)
+        t_grouped = [SVGTensor.from_data(torch.cat(t_sep, dim=0), PAD_VAL=self.PAD_VAL).add_eos().add_sos().pad(
+            seq_len=self.MAX_TOTAL_LEN + 2)]
+        t_sep = [SVGTensor.from_data(t, PAD_VAL=self.PAD_VAL, filling=f).add_eos().add_sos().pad(seq_len=self.MAX_SEQ_LEN + 2) for
+                 t, f in zip(t_sep, fillings)]
+        for arg in set(model_args):
+            if "_grouped" in arg:
+                arg_ = arg.split("_grouped")[0]
+                t_list = t_grouped
+            else:
+                arg_ = arg
+                t_list = t_sep
+            if arg_ == "tensor":
+                res[arg] = t_list
+            if arg_ == "commands":
+                res[arg] = torch.stack([t.cmds() for t in t_list])
+            if arg_ == "args_rel":
+                res[arg] = torch.stack([t.get_relative_args() for t in t_list])
+            if arg_ == "args_bin":
+                res[arg] = torch.stack([t.get_binary_args() for t in t_list])
+            if arg_ == "args":
+                res[arg] = torch.stack([t.args() for t in t_list])
+        if "filling" in model_args:
+            res["filling"] = torch.stack([torch.tensor(t.filling) for t in t_sep]).unsqueeze(-1)
+        if "label" in model_args:
+            res["label"] = label
+        return res
+def load_dataset(cfg: _Config, already_preprocessed=True, train_split=False):
+    df = pd.read_csv(cfg.meta_filepath)
+    if len(df) > 0:
+        if cfg.filter_uni is not None:
+            df = df[df.uni.isin(cfg.filter_uni)]
+        if cfg.filter_platform is not None:
+            df = df[df.platform.isin(cfg.filter_platform)]
+        if cfg.filter_category is not None:
+            df = df[df.category.isin(cfg.filter_category)]
+        df = df[(df.nb_groups <= cfg.max_num_groups) & (df.max_len_group <= cfg.max_seq_len)]
+        if cfg.max_total_len is not None:
+            df = df[df.total_len <= cfg.max_total_len]
+    df = df.sample(frac=cfg.dataset_ratio) if cfg.dataset_ratio < 1.0 else df
+    train_df, valid_df = train_test_split(df, train_size=cfg.train_ratio)
+    if train_split:
+        train_df, valid_df = train_test_split(train_df, train_size=cfg.train_ratio)
+    train_dataset = SVGDataset(train_df, cfg.data_dir, cfg.model_args, cfg.max_num_groups, cfg.max_seq_len, cfg.max_total_len, nb_augmentations=cfg.nb_augmentations, already_preprocessed=already_preprocessed)
+    valid_dataset = SVGDataset(valid_df, cfg.data_dir, cfg.model_args, cfg.max_num_groups, cfg.max_seq_len, cfg.max_total_len, nb_augmentations=cfg.nb_augmentations, already_preprocessed=already_preprocessed)
+    print(f"Number of train SVGs: {len(train_df)}")
+    # print(f"First SVG in train: {train_df.iloc[0]['id']} - {train_df.iloc[0]['category']} - {train_df.iloc[0]['subcategory']}")
+    print(f"First SVG in train: {train_df.iloc[0]['id']}")
+    print(f"Number of valid SVGs: {len(valid_df)}")
+    # print(f"First SVG in train: {valid_df.iloc[0]['id']} - {valid_df.iloc[0]['category']} - {valid_df.iloc[0]['subcategory']}")
+    print(f"First SVG in train: {valid_df.iloc[0]['id']}")
+    return train_dataset, valid_dataset

deepsvg/svglib/__init__.py ADDED Viewed

File without changes

deepsvg/svglib/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (146 Bytes). View file

deepsvg/svglib/__pycache__/geom.cpython-310.pyc ADDED Viewed

Binary file (18.2 kB). View file

deepsvg/svglib/__pycache__/svg.cpython-310.pyc ADDED Viewed

Binary file (20.3 kB). View file