studyfar commited on Oct 19, 2024

Commit

41f97d1

1 Parent(s): 1559c7d

initial

Browse files

Files changed (18) hide show

bvh2ts.py +69 -0
checkpoint/UniMTS.pth +3 -0
contrastive.py +62 -0
data.py +321 -0
evaluate.py +99 -0
evaluate_custom.py +101 -0
finetune.py +169 -0
finetune_custom.py +172 -0
model.py +350 -0
pos2bvh.py +41 -0
pretrain.py +111 -0
run_evaluation.sh +4 -0
run_evaluation_custom.sh +8 -0
run_finetune.sh +19 -0
run_finetune_custom.sh +33 -0
run_pretrain.sh +4 -0
text_aug.py +66 -0
utils.py +215 -0

bvh2ts.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from imusim.all import *
+import imusim
+import numpy as np
+from tqdm import tqdm
+import multiprocessing
+import os
+with open('./bvh/000000.bvh', 'r') as file:
+    lines = file.readlines()
+    line_109 = lines[108]
+    frame_time = line_109.split(': ')[1].strip()
+    frame_time_value = float(frame_time)
+    print(frame_time_value)
+def process_file(f):
+    imu_file_path = './output/%s.npy' % f
+    if not os.path.exists(imu_file_path):
+        samplingPeriod = frame_time_value
+        imu = Orient3IMU()
+        env = Environment()
+        samples = 1000
+        rotationalVelocity = 20
+        calibrator = ScaleAndOffsetCalibrator(env, samples, samplingPeriod, rotationalVelocity)
+        calibration = calibrator.calibrate(imu)
+        try:
+            model = loadBVHFile('./bvh/%s.bvh' % f)
+            splinedModel = SplinedBodyModel(model)
+            imu_list = []
+            for i in range(22):
+                sim = Simulation(environment=env)
+                imu.simulation = sim
+                if i not in [4,8,13,17,21]:
+                    imu.trajectory = splinedModel.getJoint('joint_%s' % str(i))
+                else:
+                    imu.trajectory = splinedModel.getPoint('joint_%s_end' % str(i-1))
+                sim.time = splinedModel.startTime
+                BasicIMUBehaviour(imu, samplingPeriod, calibration, initialTime=sim.time)
+                sim.run(splinedModel.endTime, printProgress=False)
+                acc = imu.accelerometer.calibratedMeasurements.values
+                gyro = imu.gyroscope.calibratedMeasurements.values
+                imu_npy = np.concatenate((acc, gyro), axis=0)
+                imu_list.append(imu_npy)
+            imu_npy = np.stack(imu_list, axis=1).transpose(2,1,0)
+            np.save('./output/%s' % f, imu_npy)
+        except (imusim.maths.splines.Spline.InsufficientPointsError, AttributeError, IndexError) as e:
+            print(f"Error processing file {f}: {e}. Skipping.")
+            with open('log.txt', 'a') as log_file:
+                log_file.write(f + '\n')
+source_dir = './bvh'
+npy_files = [file[:-4] for file in os.listdir(source_dir) if file.endswith('.bvh')]
+# Process files in parallel
+pool = multiprocessing.Pool(processes=8)
+for _ in tqdm(pool.imap_unordered(process_file, npy_files), total=len(npy_files)):
+    pass
+pool.close()
+pool.join()

checkpoint/UniMTS.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9858c0084d936655240407e30ff9db9adeded6a67dc5650e3f667578e93b220
+size 274583082

contrastive.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+import torch.nn as nn
+import clip
+from model import ST_GCN_18
+class ContrastiveModule(nn.Module):
+    def __init__(self, args):
+        super(ContrastiveModule, self).__init__()
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model, preprocess = clip.load("ViT-B/32", device=device)
+        del model.visual
+        self.model = model
+        base_channel = 3
+        base_channel = base_channel * 2 if args.gyro else base_channel
+        base_channel = base_channel * 2 if args.stft else base_channel
+        self.model.acc = ST_GCN_18(in_channels=base_channel)
+        self.model = self.model.float()
+        if args.stage == 'finetune':
+            self.fc = nn.Linear(512, args.num_class)
+    def encode_image(self, image):
+        return self.model.acc(image.float()).squeeze(-1).squeeze(-1)
+    def encode_text(self, text):
+        x = self.model.token_embedding(text).float() # b,t,512
+        x = x + self.model.positional_embedding.float()
+        x = x.permute(1, 0, 2)  # b,t,512 -> t,b,512
+        x = self.model.transformer(x)
+        x = x.permute(1, 0, 2)  # t,b,512 -> b,t,512
+        x = self.model.ln_final(x).float() # b,t,512
+        # take features from the eot embedding (eot_token is the highest number in each sequence)
+        x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.model.text_projection # b,512
+        return x
+    def classifier(self, image):
+        # for fine-tuning
+        imu_features = self.model.acc(image.float()).squeeze(-1).squeeze(-1)
+        out = self.fc(imu_features)
+        return out
+    def forward(self, inputs_imu, inputs_text):
+        imu_features = self.encode_image(inputs_imu)
+        text_features = self.encode_text(inputs_text)
+        # normalized features
+        imu_features = imu_features / imu_features.norm(dim=-1, keepdim=True)
+        text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+        # logits
+        logit_scale = self.model.logit_scale.exp()
+        logits_per_image = logit_scale * imu_features @ text_features.t()
+        logits_per_text = logits_per_image.t()
+        return logits_per_image, logits_per_text

data.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import torch
+import numpy as np
+import random
+import os
+import json
+from scipy.signal import resample
+import clip
+from torch.utils.data import Dataset
+class CLIPDataset(Dataset):
+    def __init__(self, args):
+        imu_dirs = [
+            f'{args.data_path}/sim/',
+        ]
+        text_dirs = [
+            f'{args.data_path}/aug_texts/',
+        ]
+        self.paths = []
+        for imu_dir, text_dir in zip(imu_dirs, text_dirs):
+            imu_files = [f.split('.')[0] for f in os.listdir(imu_dir) if os.path.isfile(os.path.join(imu_dir, f))]
+            text_files = [f.split('.')[0] for f in os.listdir(text_dir) if os.path.isfile(os.path.join(text_dir, f))]
+            common_files = [f for f in imu_files if f in text_files]
+            for f in common_files:
+                self.paths.append((os.path.join(imu_dir, f + '.npy'), os.path.join(text_dir, f + '.txt')))
+        self.args = args
+        if args.sample < 1:
+            self.paths = random.sample(self.paths, int(len(self.paths) * args.sample))
+    def __len__(self):
+        return len(self.paths)
+    def __getitem__(self, idx):
+        # load imu
+        imu_path, text_path = self.paths[idx]
+        imu = np.load(imu_path)
+        imu[np.isnan(imu)] = 0
+        # padding
+        if len(imu) < self.args.padding_size:
+            imu = np.pad(imu, ((0, self.args.padding_size - len(imu)), (0, 0), (0, 0)), mode='wrap')
+        imu = imu[:self.args.padding_size]
+        # random masking
+        mask = np.zeros_like(imu)
+        k = np.random.randint(1, 6) # randomly select k joints
+        selected_joints = np.random.choice(22, k, replace=False)
+        mask[:,selected_joints] = 1
+        imu = imu.reshape(len(imu), -1)
+        mask = mask.reshape(len(mask), -1)
+        # load text
+        with open(text_path, 'r') as file:
+            lines = file.readlines()
+        text = random.choice(lines).split('#')[0].strip() # remove the comment starting from "#"
+        batch = {}
+        batch['imu'] = imu
+        batch['text'] = text
+        batch['mask'] = mask
+        return batch
+def select_samples(data, masks, labels, k, name, data_path):
+    unique_labels = torch.unique(labels)
+    selected_data = []
+    selected_masks = []
+    selected_labels = []
+    all_indices = torch.load(f'{data_path}/few_shot_data_2/{name}_k={k}.pth')
+    for i, label in enumerate(unique_labels):
+        selected_indices = all_indices[i]
+        selected_data.append(data[selected_indices])
+        selected_masks.append(masks[selected_indices])
+        selected_labels.append(labels[selected_indices])
+    selected_data = torch.cat(selected_data, dim=0)
+    selected_masks = torch.cat(selected_masks, dim=0)
+    selected_labels = torch.cat(selected_labels, dim=0)
+    return selected_data, selected_masks, selected_labels
+def load(dataset, padding_size, data_path, split='test', k=None):
+    print(dataset)
+    X = np.load(f'{data_path}/{dataset}/X_{split}.npy')
+    real_labels = torch.from_numpy(np.load(f'{data_path}/{dataset}/y_{split}.npy'))
+    with open(f'{data_path}/{dataset}/{dataset}.json', 'r') as file:
+        data = json.load(file)
+    all_X = np.zeros((X.shape[0], X.shape[1], 22, 6))
+    if dataset == 'PAMAP':
+        all_X[:,:,21] = np.concatenate((X[:,:,0:3], X[:,:,3:6]), axis=-1)
+        all_X[:,:,11] = np.concatenate((X[:,:,18:21], X[:,:,21:24]), axis=-1)
+        all_X[:,:,7] = np.concatenate((X[:,:,9:12], X[:,:,12:15]), axis=-1)
+        original_sampling_rate = 100
+        num_classes = 12
+    elif dataset == 'USCHAD':
+        all_X[:,:,5] = np.concatenate((X[:,:,0:3] * 9.80665, X[:,:,3:6] / 180 * np.pi), axis=-1)
+        original_sampling_rate = 100
+        num_classes = 12
+    elif dataset == 'UCIHAR':
+        all_X[:,:,9] = np.concatenate((X[:,:,6:9] * 9.80665, X[:,:,3:6]), axis=-1) # linear accel, gyro, total accel
+        original_sampling_rate = 50
+        num_classes = 6
+    elif dataset == 'Opp_g':
+        all_X[:,:,10] = np.concatenate((X[:,:,0:3] / 1000 * 9.8, X[:,:,3:6] / 1000), axis=-1) # convert unit from milli g to m/s^2
+        all_X[:,:,19] = np.concatenate((X[:,:,9:12] / 1000 * 9.8, X[:,:,12:15] / 1000), axis=-1)
+        all_X[:,:,20] = np.concatenate((X[:,:,18:21] / 1000 * 9.8, X[:,:,21:24] / 1000), axis=-1)
+        all_X[:,:,15] = np.concatenate((X[:,:,27:30] / 1000 * 9.8, X[:,:,30:33] / 1000), axis=-1)
+        all_X[:,:,16] = np.concatenate((X[:,:,36:39] / 1000 * 9.8, X[:,:,39:42] / 1000), axis=-1)
+        original_sampling_rate = 30
+        num_classes = 4 # locomotion
+    elif dataset == 'WISDM':
+        all_X[:,:,21] = np.concatenate((X[:,:,0:3], X[:,:,3:6]), axis=-1)
+        original_sampling_rate = 20
+        num_classes = 18
+    elif dataset == 'DSADS':
+        all_X[:,:,11] = np.concatenate((X[:,:,0:3], X[:,:,3:6]), axis=-1)
+        all_X[:,:,21] = np.concatenate((X[:,:,9:12], X[:,:,12:15]), axis=-1)
+        all_X[:,:,17] = np.concatenate((X[:,:,18:21], X[:,:,21:24]), axis=-1)
+        all_X[:,:,6] = np.concatenate((X[:,:,27:30], X[:,:,30:33]), axis=-1)
+        all_X[:,:,2] = np.concatenate((X[:,:,36:39], X[:,:,39:42]), axis=-1)
+        original_sampling_rate = 25
+        num_classes = 19
+    elif dataset == 'Harth':
+        all_X[:,:,9,:3] = X[:,:,:3] * 9.80665
+        all_X[:,:,6,:3] = X[:,:,3:6] * 9.80665
+        original_sampling_rate = 50
+        num_classes = 12
+    elif dataset == 'Wharf':
+        X = -14.709 + X / 63 * (2 * 14.709)
+        all_X[:,:,21,:3] = X
+        original_sampling_rate = 32
+        num_classes = 14
+    elif dataset == 'Mhealth':
+        all_X[:,:,11,:3] = X[:,:,0:3]
+        all_X[:,:,3] = np.concatenate((X[:,:,6:9], X[:,:,9:12] / 180 * np.pi), axis=-1)
+        all_X[:,:,21] = np.concatenate((X[:,:,15:18], X[:,:,18:21] / 180 * np.pi), axis=-1)
+        original_sampling_rate = 50
+        num_classes = 12
+    elif dataset == 'UTD-MHAD':
+        all_X[real_labels < 21,:,21,:] = np.concatenate((X[real_labels < 21,:,0:3] * 9.80665, X[real_labels < 21,:,3:6] / 180 * np.pi), axis=-1)
+        all_X[real_labels >= 21,:,5,:] = np.concatenate((X[real_labels >= 21,:,0:3] * 9.80665, X[real_labels >= 21,:,3:6] / 180 * np.pi), axis=-1)
+        original_sampling_rate = 50
+        num_classes = 27
+    elif dataset == 'MotionSense':
+        all_X[:,:,5] = np.concatenate((X[:,:,:3] * 9.80665, X[:,:,3:6]), axis=-1)
+        all_X[:,:,1] = np.concatenate((X[:,:,:3] * 9.80665, X[:,:,3:6]), axis=-1)
+        original_sampling_rate = 50
+        num_classes = 6
+    elif dataset == 'w-HAR':
+        all_X[:,:,7] = np.concatenate((X[:,:,:3] * 9.80665, X[:,:,3:6] / 180 * np.pi), axis=-1)
+        original_sampling_rate = 250
+        num_classes = 7
+    elif dataset == 'Shoaib':
+        all_X[:,:,1] = X[:,:,:6]
+        all_X[:,:,5] = X[:,:,6:12]
+        all_X[:,:,21] = X[:,:,12:18]
+        all_X[:,:,20] = X[:,:,18:24]
+        all_X[:,:,0] = X[:,:,24:30]
+        original_sampling_rate = 50
+        num_classes = 7
+    elif dataset == 'har70plus':
+        all_X[:,:,0,:3] = X[:,:,:3] * 9.80665
+        all_X[:,:,5,:3] = X[:,:,3:6] * 9.80665
+        original_sampling_rate = 50
+        num_classes = 7
+    elif dataset == 'MMAct':
+        all_X[:,:,5] = np.concatenate((X[:,:,:3], X[:,:,3:6]), axis=-1)
+        all_X[:,:,21,:3] = X[:,:,6:9]
+        original_sampling_rate = 50
+        num_classes = 35
+    elif dataset == 'realworld':
+        all_X[:,:,14] = np.concatenate((X[:,:,:3], X[:,:,3:6]), axis=-1)
+        all_X[:,:,16] = np.concatenate((X[:,:,6:9], X[:,:,9:12]), axis=-1)
+        all_X[:,:,13] = np.concatenate((X[:,:,12:15], X[:,:,15:18]), axis=-1)
+        all_X[:,:,3] = np.concatenate((X[:,:,18:21], X[:,:,21:24]), axis=-1)
+        all_X[:,:,1] = np.concatenate((X[:,:,24:27], X[:,:,27:30]), axis=-1)
+        all_X[:,:,15] = np.concatenate((X[:,:,30:33], X[:,:,33:36]), axis=-1)
+        all_X[:,:,9] = np.concatenate((X[:,:,36:39], X[:,:,39:42]), axis=-1)
+        original_sampling_rate = 50
+        num_classes = 8
+    elif dataset == 'TNDA-HAR':
+        all_X[:,:,20] = np.concatenate((X[:,:,:3], X[:,:,3:6]), axis=-1)
+        all_X[:,:,2] = np.concatenate((X[:,:,6:9], X[:,:,9:12]), axis=-1)
+        all_X[:,:,21] = np.concatenate((X[:,:,12:15], X[:,:,15:18]), axis=-1)
+        all_X[:,:,3] = np.concatenate((X[:,:,18:21], X[:,:,21:24]), axis=-1)
+        all_X[:,:,11] = np.concatenate((X[:,:,24:27], X[:,:,27:30]), axis=-1)
+        original_sampling_rate = 50
+        num_classes = 8
+    elif dataset == 'ut-complex':
+        all_X[:,:,5] = np.concatenate((X[:,:,:3], X[:,:,3:6]), axis=-1)
+        all_X[:,:,21] = np.concatenate((X[:,:,6:9], X[:,:,9:12]), axis=-1)
+        original_sampling_rate = 50
+        num_classes = 13
+    all_X = all_X.reshape(all_X.shape[0], all_X.shape[1], 22 * 6)
+    # resample real data to 20 Hz
+    new_sampling_rate = 20
+    new_length = int((all_X.shape[1] / original_sampling_rate) * new_sampling_rate)
+    resampled_data = np.array([resample(sequence, new_length) for sequence in all_X])
+    # pad real data to args.padding_size
+    masks = np.ones_like(resampled_data)
+    if resampled_data.shape[1] < padding_size:
+        resampled_data = np.pad(resampled_data, ((0, 0), (0, padding_size - resampled_data.shape[1]), (0, 0)), 'wrap') # N, 200, 6
+        masks = np.pad(masks, ((0, 0), (0, padding_size - masks.shape[1]), (0, 0)), 'constant') # N, 200, 6
+    real_inputs = torch.from_numpy(resampled_data[:,:padding_size,:]).float()
+    real_masks = torch.from_numpy(masks[:,:padding_size,:]).float()
+    if split == 'train' and k and k < len(real_inputs):
+        real_inputs, real_masks, real_labels = select_samples(real_inputs, real_masks, real_labels, k, dataset, data_path)
+    print(real_inputs.shape, real_labels.shape)
+    # load text
+    label_dictionary = data['label_dictionary']
+    label_list = [' '.join(labels) for labels in label_dictionary.values()]
+    all_text = clip.tokenize(label_list).cuda()
+    return real_inputs, real_masks, real_labels, label_list, all_text, num_classes
+def load_multiple(dataset_list, padding_size, data_path, split='test', k=None):
+    real_inputs_list, real_masks_list, real_labels_list, label_list_list, all_text_list, num_classes_list = [], [], [], [], [], []
+    for dataset in dataset_list:
+        real_inputs, real_masks, real_labels, label_list, all_text, num_classes = load(dataset, padding_size, data_path, split, k)
+        real_inputs_list.append(real_inputs)
+        real_masks_list.append(real_masks)
+        real_labels_list.append(real_labels)
+        label_list_list.append(label_list)
+        all_text_list.append(all_text)
+        num_classes_list.append(num_classes)
+    return real_inputs_list, real_masks_list, real_labels_list, label_list_list, all_text_list, num_classes_list
+def load_custom_data(X_path, y_path, config_path, joint_list, original_sampling_rate, padding_size=200, split='test', k=None, few_shot_path=None):
+    X = np.load(X_path)
+    real_labels = torch.from_numpy(np.load(y_path))
+    with open(config_path, 'r') as file:
+        data = json.load(file)
+    all_X = np.zeros((X.shape[0], X.shape[1], 22, 6))
+    for i, joint in enumerate(joint_list):
+        all_X[:,:,joint] = np.concatenate((X[:,:,6*i:6*i+3], X[:,:,6*i+3:6*i+6]), axis=-1)
+    all_X = all_X.reshape(all_X.shape[0], all_X.shape[1], 22 * 6)
+    # resample real data to 20 Hz
+    new_sampling_rate = 20
+    new_length = int((all_X.shape[1] / original_sampling_rate) * new_sampling_rate)
+    resampled_data = np.array([resample(sequence, new_length) for sequence in all_X])
+    # pad real data to args.padding_size
+    masks = np.ones_like(resampled_data)
+    if resampled_data.shape[1] < padding_size:
+        resampled_data = np.pad(resampled_data, ((0, 0), (0, padding_size - resampled_data.shape[1]), (0, 0)), 'wrap') # N, 200, 6
+        masks = np.pad(masks, ((0, 0), (0, padding_size - masks.shape[1]), (0, 0)), 'constant') # N, 200, 6
+    real_inputs = torch.from_numpy(resampled_data[:,:padding_size,:]).float()
+    real_masks = torch.from_numpy(masks[:,:padding_size,:]).float()
+    if split == 'train' and k and k < len(real_inputs):
+        unique_labels = torch.unique(real_labels)
+        if few_shot_path is None:
+            print('Generating few shot indices ...')
+            all_indices = []
+            for i, label in enumerate(unique_labels):
+                indices = torch.where(real_labels == label)[0]
+                selected_indices = indices[torch.randperm(len(indices))[:k]]
+                all_indices.append(selected_indices)
+        else:
+            print('Loading existing few shot indices ...')
+            all_indices = torch.load(few_shot_path)
+        selected_data = []
+        selected_masks = []
+        selected_labels = []
+        for i, label in enumerate(unique_labels):
+            selected_indices = all_indices[i]
+            selected_data.append(real_inputs[selected_indices])
+            selected_masks.append(real_masks[selected_indices])
+            selected_labels.append(real_labels[selected_indices])
+        selected_data = torch.cat(selected_data, dim=0)
+        selected_masks = torch.cat(selected_masks, dim=0)
+        selected_labels = torch.cat(selected_labels, dim=0)
+        real_inputs, real_masks, real_labels = selected_data, selected_masks, selected_labels
+    print(real_inputs.shape, real_labels.shape)
+    # load text
+    label_dictionary = data['label_dictionary']
+    label_list = [' '.join(labels) for labels in label_dictionary.values()]
+    all_text = clip.tokenize(label_list).cuda()
+    return real_inputs, real_masks, real_labels, label_list, all_text

evaluate.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import numpy as np
+import torch
+import argparse
+import os
+import numpy as np
+from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
+import wandb
+import datetime
+from torch.utils.data import DataLoader, TensorDataset
+from data import load, load_multiple
+from utils import compute_metrics_np
+from contrastive import ContrastiveModule
+def main(args):
+    # load real data
+    dataset_list = ['Opp_g','UCIHAR','MotionSense','w-HAR','Shoaib','har70plus','realworld','TNDA-HAR','PAMAP',\
+                    'USCHAD','Mhealth','Harth','ut-complex','Wharf','WISDM','DSADS','UTD-MHAD','MMAct']
+    real_inputs_list, real_masks_list, real_labels_list, label_list_list, all_text_list, _ = load_multiple(dataset_list, args.padding_size, args.data_path)
+    test_real_dataloader_list = []
+    for real_inputs, real_masks, real_labels in zip(real_inputs_list, real_masks_list, real_labels_list):
+        real_dataset = TensorDataset(real_inputs, real_masks, real_labels)
+        test_real_dataloader_list.append(DataLoader(real_dataset, batch_size=args.batch_size, shuffle=False))
+    date = datetime.datetime.now().strftime("%d-%m-%y_%H:%M")
+    wandb.init(
+        project='UniMTS',
+        name=f"{args.run_tag}_{args.stage}_" + f"{date}"
+    )
+    model = ContrastiveModule(args).cuda()
+    model.model.load_state_dict(torch.load(f'{args.checkpoint}'))
+    model.eval()
+    with torch.no_grad():
+        for ds, real_labels, test_real_dataloader, label_list, all_text in zip(dataset_list, real_labels_list, test_real_dataloader_list, label_list_list, all_text_list):
+            pred_whole, logits_whole = [], []
+            for input, mask, label in test_real_dataloader:
+                input = input.cuda()
+                mask = mask.cuda()
+                label = label.cuda()
+                if not args.gyro:
+                    b, t, c = input.shape
+                    indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                    input = input[:,:,indices]
+                b, t, c = input.shape
+                if args.stft:
+                    input_stft = input.permute(0,2,1).reshape(b * c,t)
+                    input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                    input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                    input = torch.cat((input, input_stft), dim=-1)
+                input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+                logits_per_imu, logits_per_text = model(input, all_text)
+                logits_whole.append(logits_per_imu)
+                pred = torch.argmax(logits_per_imu, dim=-1).detach().cpu().numpy()
+                pred_whole.append(pred)
+            pred = np.concatenate(pred_whole)
+            acc = accuracy_score(real_labels, pred)
+            prec = precision_score(real_labels, pred, average='macro')
+            rec = recall_score(real_labels, pred, average='macro')
+            f1 = f1_score(real_labels, pred, average='macro')
+            print(f"{ds} acc: {acc}, {ds} prec: {prec}, {ds} rec: {rec}, {ds} f1: {f1}")
+            wandb.log({f"{ds} acc": acc, f"{ds} prec": prec, f"{ds} rec": rec, f"{ds} f1": f1})
+            logits_whole = torch.cat(logits_whole)
+            r_at_1, r_at_2, r_at_3, r_at_4, r_at_5, mrr_score = compute_metrics_np(logits_whole.detach().cpu().numpy(), real_labels.numpy())
+            print(f"{ds} R@1: {r_at_1}, R@2: {r_at_2}, R@3: {r_at_3}, R@4: {r_at_4}, R@5: {r_at_5}, MRR: {mrr_score}")
+            wandb.log({f"{ds} R@1": r_at_1, f"{ds} R@2": r_at_2, f"{ds} R@3": r_at_3, f"{ds} R@4": r_at_4, f"{ds} R@5": r_at_5, f"{ds} MRR": mrr_score})
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Unified Pre-trained Motion Time Series Model')
+    # data
+    parser.add_argument('--padding_size', type=int, default='200', help='padding size (default: 200)')
+    parser.add_argument('--data_path', type=str, default='./data/', help='/path/to/data/')
+    # training
+    parser.add_argument('--run_tag', type=str, default='exp0', help='logging tag')
+    parser.add_argument('--stage', type=str, default='evaluation', help='training or evaluation stage')
+    parser.add_argument('--gyro', type=int, default=0, help='using gyro or not')
+    parser.add_argument('--stft', type=int, default=0, help='using stft or not')
+    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+    parser.add_argument('--checkpoint', type=str, default='./checkpoint/', help='/path/to/checkpoint/')
+    args = parser.parse_args()
+    main(args)

evaluate_custom.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import numpy as np
+import torch
+import argparse
+import os
+import numpy as np
+from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
+import wandb
+import datetime
+from torch.utils.data import DataLoader, TensorDataset
+from data import load, load_multiple, load_custom_data
+from utils import compute_metrics_np
+from contrastive import ContrastiveModule
+def main(args):
+    # load real data
+    real_inputs, real_masks, real_labels, label_list, all_text = load_custom_data(
+        args.X_path, args.y_path, args.config_path, args.joint_list, args.original_sampling_rate, padding_size=args.padding_size, split='test'
+    )
+    real_dataset = TensorDataset(real_inputs, real_masks, real_labels)
+    test_real_dataloader = DataLoader(real_dataset, batch_size=args.batch_size, shuffle=False)
+    date = datetime.datetime.now().strftime("%d-%m-%y_%H:%M")
+    wandb.init(
+        project='UniMTS',
+        name=f"{args.run_tag}_{args.stage}_" + f"{date}"
+    )
+    model = ContrastiveModule(args).cuda()
+    model.model.load_state_dict(torch.load(f'{args.checkpoint}'))
+    model.eval()
+    with torch.no_grad():
+        pred_whole, logits_whole = [], []
+        for input, mask, label in test_real_dataloader:
+            input = input.cuda()
+            mask = mask.cuda()
+            label = label.cuda()
+            if not args.gyro:
+                b, t, c = input.shape
+                indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                input = input[:,:,indices]
+            b, t, c = input.shape
+            if args.stft:
+                input_stft = input.permute(0,2,1).reshape(b * c,t)
+                input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                input = torch.cat((input, input_stft), dim=-1)
+            input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+            logits_per_imu, logits_per_text = model(input, all_text)
+            logits_whole.append(logits_per_imu)
+            pred = torch.argmax(logits_per_imu, dim=-1).detach().cpu().numpy()
+            pred_whole.append(pred)
+        pred = np.concatenate(pred_whole)
+        acc = accuracy_score(real_labels, pred)
+        prec = precision_score(real_labels, pred, average='macro')
+        rec = recall_score(real_labels, pred, average='macro')
+        f1 = f1_score(real_labels, pred, average='macro')
+        print(f"acc: {acc}, prec: {prec}, rec: {rec}, f1: {f1}")
+        wandb.log({f"acc": acc, f"prec": prec, f"rec": rec, f"f1": f1})
+        logits_whole = torch.cat(logits_whole)
+        r_at_1, r_at_2, r_at_3, r_at_4, r_at_5, mrr_score = compute_metrics_np(logits_whole.detach().cpu().numpy(), real_labels.numpy())
+        print(f"R@1: {r_at_1}, R@2: {r_at_2}, R@3: {r_at_3}, R@4: {r_at_4}, R@5: {r_at_5}, MRR: {mrr_score}")
+        wandb.log({f"R@1": r_at_1, f"R@2": r_at_2, f"R@3": r_at_3, f"R@4": r_at_4, f"R@5": r_at_5, f"MRR": mrr_score})
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Unified Pre-trained Motion Time Series Model')
+    # data
+    parser.add_argument('--padding_size', type=int, default='200', help='padding size (default: 200)')
+    parser.add_argument('--X_path', type=str, required=True, help='/path/to/data/')
+    parser.add_argument('--y_path', type=str, required=True, help='/path/to/label/')
+    parser.add_argument('--config_path', type=str, required=True, help='/path/to/config/')
+    parser.add_argument('--joint_list', nargs='+', type=int, required=True, help='List of joint indices')
+    parser.add_argument('--original_sampling_rate', type=int, required=True, help='original sampling rate')
+    # training
+    parser.add_argument('--run_tag', type=str, default='exp0', help='logging tag')
+    parser.add_argument('--stage', type=str, default='evaluation', help='training or evaluation stage')
+    parser.add_argument('--gyro', type=int, default=0, help='using gyro or not')
+    parser.add_argument('--stft', type=int, default=0, help='using stft or not')
+    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+    parser.add_argument('--checkpoint', type=str, default='./checkpoint/', help='/path/to/checkpoint/')
+    args = parser.parse_args()
+    main(args)

finetune.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+import argparse
+import os
+import numpy as np
+from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
+import wandb
+import datetime
+from torch.utils.data import DataLoader, TensorDataset
+import torch.optim as optim
+from data import load_multiple
+from utils import compute_metrics_np
+from contrastive import ContrastiveModule
+def main(args):
+    # load real data
+    dataset_list = ['Opp_g','UCIHAR','MotionSense','w-HAR','Shoaib','har70plus','realworld','TNDA-HAR','PAMAP',\
+                    'USCHAD','Mhealth','Harth','ut-complex','Wharf','WISDM','DSADS','UTD-MHAD','MMAct']
+    train_inputs_list, train_masks_list, train_labels_list, label_list_list, all_text_list, num_classes_list = load_multiple(dataset_list, args.padding_size, args.data_path, split='train', k=args.k)
+    test_inputs_list, test_masks_list, test_labels_list, label_list_list, all_text_list, _ = load_multiple(dataset_list, args.padding_size, args.data_path, split='test')
+    train_dataloader_list, test_dataloader_list = [], []
+    for real_inputs, real_masks, real_labels in zip(train_inputs_list, train_masks_list, train_labels_list):
+        train_dataset = TensorDataset(real_inputs, real_masks, real_labels)
+        train_dataloader_list.append(DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True))
+    for real_inputs, real_masks, real_labels in zip(test_inputs_list, test_masks_list, test_labels_list):
+        test_dataset = TensorDataset(real_inputs, real_masks, real_labels)
+        test_dataloader_list.append(DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False))
+    date = datetime.datetime.now().strftime("%d-%m-%y_%H:%M")
+    wandb.init(
+        project='UniMTS',
+        name=f"{args.run_tag}_{args.stage}_{args.mode}_k={args.k}_" + f"{date}"
+    )
+    save_path = './checkpoint/%s/' % args.run_tag
+    for ds, train_dataloader, test_dataloader, test_labels, label_list, all_text, num_class in \
+            zip(dataset_list, train_dataloader_list, test_dataloader_list, test_labels_list, label_list_list, all_text_list, num_classes_list):
+        args.num_class = num_class
+        model = ContrastiveModule(args).cuda()
+        optimizer = optim.Adam(model.parameters(), lr=1e-4)
+        if args.mode == 'full' or args.mode == 'probe':
+            model.model.load_state_dict(torch.load(f'{args.checkpoint}'))
+        if args.mode == 'probe':
+            for name, param in model.model.named_parameters():
+                param.requires_grad = False
+        best_loss = None
+        for epoch in range(args.num_epochs):
+            tol_loss = 0
+            model.train()
+            for i, (input, mask, label) in enumerate(train_dataloader):
+                input = input.cuda()
+                labels = label.cuda()
+                if not args.gyro:
+                    b, t, c = input.shape
+                    indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                    input = input[:,:,indices]
+                b, t, c = input.shape
+                if args.stft:
+                    input_stft = input.permute(0,2,1).reshape(b * c,t)
+                    input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                    input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                    input = torch.cat((input, input_stft), dim=-1)
+                input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+                output = model.classifier(input)
+                loss = F.cross_entropy(output.float(), labels.long(), reduction="mean")
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                tol_loss += len(input) * loss.item()
+                # print(epoch, i, loss.item())
+            print(f'Epoch [{epoch+1}/{args.num_epochs}], Loss: {tol_loss / len(train_dataset):.4f}')
+            wandb.log({'{ds} loss': tol_loss / len(train_dataset)})
+            if best_loss is None or tol_loss < best_loss:
+                best_loss = tol_loss
+                torch.save(model.state_dict(), os.path.join(save_path, f'{ds}_k={args.k}_best_loss.pth'))
+        # evaluation
+        model.load_state_dict(torch.load(os.path.join(save_path, f'{ds}_k={args.k}_best_loss.pth')))
+        model.eval()
+        with torch.no_grad():
+            pred_whole, logits_whole = [], []
+            for input, mask, label in test_dataloader:
+                input = input.cuda()
+                label = label.cuda()
+                if not args.gyro:
+                    b, t, c = input.shape
+                    indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                    input = input[:,:,indices]
+                b, t, c = input.shape
+                if args.stft:
+                    input_stft = input.permute(0,2,1).reshape(b * c,t)
+                    input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                    input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                    input = torch.cat((input, input_stft), dim=-1)
+                input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+                logits_per_imu = model.classifier(input)
+                logits_whole.append(logits_per_imu)
+                pred = torch.argmax(logits_per_imu, dim=-1).detach().cpu().numpy()
+                pred_whole.append(pred)
+            pred = np.concatenate(pred_whole)
+            acc = accuracy_score(test_labels, pred)
+            prec = precision_score(test_labels, pred, average='macro')
+            rec = recall_score(test_labels, pred, average='macro')
+            f1 = f1_score(test_labels, pred, average='macro')
+            print(f"{ds} acc: {acc}, {ds} prec: {prec}, {ds} rec: {rec}, {ds} f1: {f1}")
+            wandb.log({f"{ds} acc": acc, f"{ds} prec": prec, f"{ds} rec": rec, f"{ds} f1": f1})
+            logits_whole = torch.cat(logits_whole)
+            r_at_1, r_at_2, r_at_3, r_at_4, r_at_5, mrr_score = compute_metrics_np(logits_whole.detach().cpu().numpy(), test_labels.numpy())
+            print(f"{ds} R@1: {r_at_1}, R@2: {r_at_2}, R@3: {r_at_3}, R@4: {r_at_4}, R@5: {r_at_5}, MRR: {mrr_score}")
+            wandb.log({f"{ds} R@1": r_at_1, f"{ds} R@2": r_at_2, f"{ds} R@3": r_at_3, f"{ds} R@4": r_at_4, f"{ds} R@5": r_at_5, f"{ds} MRR": mrr_score})
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Unified Pre-trained Motion Time Series Model')
+    # model
+    parser.add_argument('--mode', type=str, default='full', choices=['random','probe','full'], help='full fine-tuning, linear probe, random init')
+    # data
+    parser.add_argument('--padding_size', type=int, default='200', help='padding size (default: 200)')
+    parser.add_argument('--k', type=int, help='few shot samples per class (default: None)')
+    parser.add_argument('--data_path', type=str, default='./data/', help='/path/to/data/')
+    # training
+    parser.add_argument('--stage', type=str, default='finetune', help='training stage')
+    parser.add_argument('--num_epochs', type=int, default=200, help='number of fine-tuning epochs (default: 200)')
+    parser.add_argument('--run_tag', type=str, default='exp0', help='logging tag')
+    parser.add_argument('--gyro', type=int, default=0, help='using gyro or not')
+    parser.add_argument('--stft', type=int, default=0, help='using stft or not')
+    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+    parser.add_argument('--checkpoint', type=str, default='./checkpoint/', help='/path/to/checkpoint/')
+    args = parser.parse_args()
+    main(args)

finetune_custom.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+import argparse
+import os
+import numpy as np
+from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
+import wandb
+import datetime
+from torch.utils.data import DataLoader, TensorDataset
+import torch.optim as optim
+from data import load_multiple, load_custom_data
+from utils import compute_metrics_np
+from contrastive import ContrastiveModule
+def main(args):
+    train_inputs, train_masks, train_labels, _, _ = load_custom_data(
+        args.X_train_path, args.y_train_path, args.config_path, args.joint_list, args.original_sampling_rate, padding_size=args.padding_size, split='train', k=args.k, few_shot_path=None
+    )
+    train_dataset = TensorDataset(train_inputs, train_masks, train_labels)
+    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
+    test_inputs, test_masks, test_labels, _, _ = load_custom_data(
+        args.X_test_path, args.y_test_path, args.config_path, args.joint_list, args.original_sampling_rate, padding_size=args.padding_size, split='test'
+    )
+    test_dataset = TensorDataset(test_inputs, test_masks, test_labels)
+    test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
+    date = datetime.datetime.now().strftime("%d-%m-%y_%H:%M")
+    wandb.init(
+        project='UniMTS',
+        name=f"{args.run_tag}_{args.stage}_{args.mode}_k={args.k}_" + f"{date}"
+    )
+    save_path = './checkpoint/%s/' % args.run_tag
+    model = ContrastiveModule(args).cuda()
+    optimizer = optim.Adam(model.parameters(), lr=1e-4)
+    if args.mode == 'full' or args.mode == 'probe':
+        model.model.load_state_dict(torch.load(f'{args.checkpoint}'))
+    if args.mode == 'probe':
+        for name, param in model.model.named_parameters():
+            param.requires_grad = False
+    best_loss = None
+    for epoch in range(args.num_epochs):
+        tol_loss = 0
+        model.train()
+        for i, (input, mask, label) in enumerate(train_dataloader):
+            input = input.cuda()
+            labels = label.cuda()
+            if not args.gyro:
+                b, t, c = input.shape
+                indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                input = input[:,:,indices]
+            b, t, c = input.shape
+            if args.stft:
+                input_stft = input.permute(0,2,1).reshape(b * c,t)
+                input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                input = torch.cat((input, input_stft), dim=-1)
+            input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+            output = model.classifier(input)
+            loss = F.cross_entropy(output.float(), labels.long(), reduction="mean")
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            tol_loss += len(input) * loss.item()
+            # print(epoch, i, loss.item())
+        print(f'Epoch [{epoch+1}/{args.num_epochs}], Loss: {tol_loss / len(train_dataset):.4f}')
+        wandb.log({' loss': tol_loss / len(train_dataset)})
+        if best_loss is None or tol_loss < best_loss:
+            best_loss = tol_loss
+            torch.save(model.state_dict(), os.path.join(save_path, f'k={args.k}_best_loss.pth'))
+    # evaluation
+    model.load_state_dict(torch.load(os.path.join(save_path, f'k={args.k}_best_loss.pth')))
+    model.eval()
+    with torch.no_grad():
+        pred_whole, logits_whole = [], []
+        for input, mask, label in test_dataloader:
+            input = input.cuda()
+            label = label.cuda()
+            if not args.gyro:
+                b, t, c = input.shape
+                indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                input = input[:,:,indices]
+            b, t, c = input.shape
+            if args.stft:
+                input_stft = input.permute(0,2,1).reshape(b * c,t)
+                input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                input = torch.cat((input, input_stft), dim=-1)
+            input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+            logits_per_imu = model.classifier(input)
+            logits_whole.append(logits_per_imu)
+            pred = torch.argmax(logits_per_imu, dim=-1).detach().cpu().numpy()
+            pred_whole.append(pred)
+        pred = np.concatenate(pred_whole)
+        acc = accuracy_score(test_labels, pred)
+        prec = precision_score(test_labels, pred, average='macro')
+        rec = recall_score(test_labels, pred, average='macro')
+        f1 = f1_score(test_labels, pred, average='macro')
+        print(f"acc: {acc}, prec: {prec}, rec: {rec}, f1: {f1}")
+        wandb.log({f"acc": acc, f"prec": prec, f"rec": rec, f"f1": f1})
+        logits_whole = torch.cat(logits_whole)
+        r_at_1, r_at_2, r_at_3, r_at_4, r_at_5, mrr_score = compute_metrics_np(logits_whole.detach().cpu().numpy(), test_labels.numpy())
+        print(f"R@1: {r_at_1}, R@2: {r_at_2}, R@3: {r_at_3}, R@4: {r_at_4}, R@5: {r_at_5}, MRR: {mrr_score}")
+        wandb.log({f"R@1": r_at_1, f"R@2": r_at_2, f"R@3": r_at_3, f"R@4": r_at_4, f"R@5": r_at_5, f"MRR": mrr_score})
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Unified Pre-trained Motion Time Series Model')
+    # model
+    parser.add_argument('--mode', type=str, default='full', choices=['random','probe','full'], help='full fine-tuning, linear probe, random init')
+    # data
+    parser.add_argument('--padding_size', type=int, default='200', help='padding size (default: 200)')
+    parser.add_argument('--k', type=int, help='few shot samples per class (default: None)')
+    parser.add_argument('--X_train_path', type=str, required=True, help='/path/to/train/data/')
+    parser.add_argument('--X_test_path', type=str, required=True, help='/path/to/test/data/')
+    parser.add_argument('--y_train_path', type=str, required=True, help='/path/to/train/label/')
+    parser.add_argument('--y_test_path', type=str, required=True, help='/path/to/test/label/')
+    parser.add_argument('--config_path', type=str, required=True, help='/path/to/config/')
+    parser.add_argument('--few_shot_path', type=str, help='/path/to/few/shot/indices/')
+    parser.add_argument('--joint_list', nargs='+', type=int, required=True, help='List of joint indices')
+    parser.add_argument('--original_sampling_rate', type=int, required=True, help='original sampling rate')
+    parser.add_argument('--num_class', type=int, required=True, help='number of classes')
+    # training
+    parser.add_argument('--stage', type=str, default='finetune', help='training stage')
+    parser.add_argument('--num_epochs', type=int, default=200, help='number of fine-tuning epochs (default: 200)')
+    parser.add_argument('--run_tag', type=str, default='exp0', help='logging tag')
+    parser.add_argument('--gyro', type=int, default=0, help='using gyro or not')
+    parser.add_argument('--stft', type=int, default=0, help='using stft or not')
+    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+    parser.add_argument('--checkpoint', type=str, default='./checkpoint/', help='/path/to/checkpoint/')
+    args = parser.parse_args()
+    main(args)

model.py ADDED Viewed

	@@ -0,0 +1,350 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+class Graph():
+    """ The Graph to model the skeletons
+    Args:
+        strategy (string): must be one of the follow candidates
+        - uniform: Uniform Labeling
+        - distance: Distance Partitioning
+        - spatial: Spatial Configuration
+        max_hop (int): the maximal distance between two connected nodes
+        dilation (int): controls the spacing between the kernel points
+    """
+    def __init__(self,
+                 strategy='spatial',
+                 max_hop=1,
+                 dilation=1):
+        self.max_hop = max_hop
+        self.dilation = dilation
+        self.get_edge()
+        self.hop_dis = get_hop_distance(self.num_node,
+                                        self.edge,
+                                        max_hop=max_hop)
+        self.get_adjacency(strategy)
+    def __str__(self):
+        return self.A
+    def get_edge(self):
+        # edge is a list of [child, parent] paris
+        self.num_node = 22
+        self_link = [(i, i) for i in range(self.num_node)]
+        neighbor_link = [(1,0), (2,1), (3,2), (4,3), (5,0), (6,5), (7,6), (8,7), (9,0), (10,9), (11,10), (12,11), \
+                        (13,12), (14,11), (15,14), (16,15), (17,16), (18,11), (19,18), (20,19), (21,20)]
+        self.edge = self_link + neighbor_link
+        self.center = 0
+    def get_adjacency(self, strategy):
+        valid_hop = range(0, self.max_hop + 1, self.dilation)
+        adjacency = np.zeros((self.num_node, self.num_node))
+        for hop in valid_hop:
+            adjacency[self.hop_dis == hop] = 1
+        normalize_adjacency = normalize_digraph(adjacency)
+        if strategy == 'uniform':
+            A = np.zeros((1, self.num_node, self.num_node))
+            A[0] = normalize_adjacency
+            self.A = A
+        elif strategy == 'distance':
+            A = np.zeros((len(valid_hop), self.num_node, self.num_node))
+            for i, hop in enumerate(valid_hop):
+                A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
+                                                                hop]
+            self.A = A
+        elif strategy == 'spatial':
+            A = []
+            for hop in valid_hop:
+                a_root = np.zeros((self.num_node, self.num_node))
+                a_close = np.zeros((self.num_node, self.num_node))
+                a_further = np.zeros((self.num_node, self.num_node))
+                for i in range(self.num_node):
+                    for j in range(self.num_node):
+                        if self.hop_dis[j, i] == hop:
+                            if self.hop_dis[j, self.center] == self.hop_dis[
+                                    i, self.center]:
+                                a_root[j, i] = normalize_adjacency[j, i]
+                            elif self.hop_dis[j, self.center] > self.hop_dis[
+                                    i, self.center]:
+                                a_close[j, i] = normalize_adjacency[j, i]
+                            else:
+                                a_further[j, i] = normalize_adjacency[j, i]
+                if hop == 0:
+                    A.append(a_root)
+                else:
+                    A.append(a_root + a_close)
+                    A.append(a_further)
+            A = np.stack(A)
+            self.A = A
+        else:
+            raise ValueError("Do Not Exist This Strategy")
+def get_hop_distance(num_node, edge, max_hop=1):
+    A = np.zeros((num_node, num_node))
+    for i, j in edge:
+        A[j, i] = 1
+        A[i, j] = 1
+    # compute hop steps
+    hop_dis = np.zeros((num_node, num_node)) + np.inf
+    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
+    arrive_mat = (np.stack(transfer_mat) > 0)
+    for d in range(max_hop, -1, -1):
+        hop_dis[arrive_mat[d]] = d
+    return hop_dis
+def normalize_digraph(A):
+    Dl = np.sum(A, 0)
+    num_node = A.shape[0]
+    Dn = np.zeros((num_node, num_node))
+    for i in range(num_node):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i]**(-1)
+    AD = np.dot(A, Dn)
+    return AD
+def normalize_undigraph(A):
+    Dl = np.sum(A, 0)
+    num_node = A.shape[0]
+    Dn = np.zeros((num_node, num_node))
+    for i in range(num_node):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i]**(-0.5)
+    DAD = np.dot(np.dot(Dn, A), Dn)
+    return DAD
+def zero(x):
+    return 0
+def iden(x):
+    return x
+class ConvTemporalGraphical(nn.Module):
+    r"""The basic module for applying a graph convolution.
+    Args:
+        in_channels (int): Number of channels in the input sequence data
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int): Size of the graph convolving kernel
+        t_kernel_size (int): Size of the temporal convolving kernel
+        t_stride (int, optional): Stride of the temporal convolution. Default: 1
+        t_padding (int, optional): Temporal zero-padding added to both sides of
+            the input. Default: 0
+        t_dilation (int, optional): Spacing between temporal kernel elements.
+            Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the output.
+            Default: ``True``
+    Shape:
+        - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
+        - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
+        - Output[0]: Output graph sequence in :math:`(N, out_channels, T_{out}, V)` format
+        - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
+        where
+            :math:`N` is a batch size,
+            :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
+            :math:`T_{in}/T_{out}` is a length of input/output sequence,
+            :math:`V` is the number of graph nodes.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 t_kernel_size=1,
+                 t_stride=1,
+                 t_padding=0,
+                 t_dilation=1,
+                 bias=True):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.conv = nn.Conv2d(in_channels,
+                              out_channels * kernel_size,
+                              kernel_size=(t_kernel_size, 1),
+                              padding=(t_padding, 0),
+                              stride=(t_stride, 1),
+                              dilation=(t_dilation, 1),
+                              bias=bias)
+    def forward(self, x, A):
+        assert A.size(0) == self.kernel_size
+        x = self.conv(x)
+        n, kc, t, v = x.size()
+        x = x.view(n, self.kernel_size, kc // self.kernel_size, t, v)
+        x = torch.einsum('nkctv,kvw->nctw', (x, A))
+        return x.contiguous(), A
+class st_gcn_block(nn.Module):
+    r"""Applies a spatial temporal graph convolution over an input graph sequence.
+    Args:
+        in_channels (int): Number of channels in the input sequence data
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
+        stride (int, optional): Stride of the temporal convolution. Default: 1
+        dropout (int, optional): Dropout rate of the final output. Default: 0
+        residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
+    Shape:
+        - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
+        - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
+        - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
+        - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
+        where
+            :math:`N` is a batch size,
+            :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
+            :math:`T_{in}/T_{out}` is a length of input/output sequence,
+            :math:`V` is the number of graph nodes.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 dropout=0,
+                 residual=True):
+        super().__init__()
+        assert len(kernel_size) == 2
+        assert kernel_size[0] % 2 == 1
+        padding = ((kernel_size[0] - 1) // 2, 0)
+        self.gcn = ConvTemporalGraphical(in_channels, out_channels,
+                                         kernel_size[1])
+        self.tcn = nn.Sequential(
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                out_channels,
+                out_channels,
+                (kernel_size[0], 1),
+                (stride, 1),
+                padding,
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.Dropout(dropout, inplace=True),
+        )
+        if not residual:
+            self.residual = zero
+        elif (in_channels == out_channels) and (stride == 1):
+            self.residual = iden
+        else:
+            self.residual = nn.Sequential(
+                nn.Conv2d(in_channels,
+                          out_channels,
+                          kernel_size=1,
+                          stride=(stride, 1)),
+                nn.BatchNorm2d(out_channels),
+            )
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x, A):
+        res = self.residual(x)
+        x, A = self.gcn(x, A)
+        x = self.tcn(x) + res
+        return self.relu(x), A
+class ST_GCN_18(nn.Module):
+    r"""Spatial temporal graph convolutional networks.
+    Args:
+        in_channels (int): Number of channels in the input data
+        num_class (int): Number of classes for the classification task
+        graph_cfg (dict): The arguments for building the graph
+        edge_importance_weighting (bool): If ``True``, adds a learnable
+            importance weighting to the edges of the graph
+        **kwargs (optional): Other parameters for graph convolution units
+    Shape:
+        - Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`
+        - Output: :math:`(N, num_class)` where
+            :math:`N` is a batch size,
+            :math:`T_{in}` is a length of input sequence,
+            :math:`V_{in}` is the number of graph nodes,
+            :math:`M_{in}` is the number of instance in a frame.
+    """
+    def __init__(self,
+                 in_channels,
+                 edge_importance_weighting=True,
+                 data_bn=True,
+                 **kwargs):
+        super().__init__()
+        # load graph
+        self.graph = Graph()
+        A = torch.tensor(self.graph.A,
+                         dtype=torch.float32,
+                         requires_grad=False)
+        self.register_buffer('A', A)
+        # build networks
+        spatial_kernel_size = A.size(0)
+        temporal_kernel_size = 9
+        kernel_size = (temporal_kernel_size, spatial_kernel_size)
+        self.data_bn = nn.BatchNorm1d(in_channels *
+                                      A.size(1)) if data_bn else iden
+        kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
+        self.st_gcn_networks = nn.ModuleList((
+            st_gcn_block(in_channels,
+                         64,
+                         kernel_size,
+                         1,
+                         residual=False,
+                         **kwargs0),
+            st_gcn_block(64, 64, kernel_size, 1, **kwargs),
+            st_gcn_block(64, 64, kernel_size, 1, **kwargs),
+            st_gcn_block(64, 64, kernel_size, 1, **kwargs),
+            st_gcn_block(64, 128, kernel_size, 2, **kwargs),
+            st_gcn_block(128, 128, kernel_size, 1, **kwargs),
+            st_gcn_block(128, 128, kernel_size, 1, **kwargs),
+            st_gcn_block(128, 256, kernel_size, 2, **kwargs),
+            st_gcn_block(256, 256, kernel_size, 1, **kwargs),
+            st_gcn_block(256, 512, kernel_size, 1, **kwargs),
+        ))
+        # initialize parameters for edge importance weighting
+        if edge_importance_weighting:
+            self.edge_importance = nn.ParameterList([
+                nn.Parameter(torch.ones(self.A.size()))
+                for i in self.st_gcn_networks
+            ])
+        else:
+            self.edge_importance = [1] * len(self.st_gcn_networks)
+    def forward(self, x):
+        # data normalization
+        N, C, T, V, M = x.size()
+        x = x.permute(0, 4, 3, 1, 2).contiguous()
+        x = x.view(N * M, V * C, T)
+        x = self.data_bn(x)
+        x = x.view(N, M, V, C, T)
+        x = x.permute(0, 1, 3, 4, 2).contiguous()
+        x = x.view(N * M, C, T, V)
+        # forward
+        for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
+            x, _ = gcn(x, self.A * importance)
+        # global pooling
+        x = F.avg_pool2d(x, x.size()[2:]) # (b, 512, t, joint)
+        x = x.view(N, M, -1, 1, 1).mean(dim=1)
+        return x

pos2bvh.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import numpy as np
+from Quaternions import Quaternions
+from scipy_motion import myBVH
+import BVH
+from scipy_motion import myAnimation
+import Animation
+from scipy_motion import myInverseKinematics as myIK
+import InverseKinematics as IK
+from tqdm import tqdm
+import multiprocessing
+import os
+import os.path as osp
+from scipy.spatial.transform import Rotation as R
+parents = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19]
+# names = ['root','leftleg1','leftleg2','leftleg3','leftleg4','rightleg1','rightleg2','rightleg3','rightleg4',\
+#         'spline1','spline2','spline3','spline4','spline5','rightarm1','rightarm2','rightarm3','rightarm4',\
+#         'leftarm1','lertarm2','leftarm3','leftarm4']
+def process_file(f):
+    fk_positions = np.load('/path/to/joint/pos/%s.npy' % (f))
+    frametime = 1 / 20
+    anim_ik, _, _, save_file = IK.animation_from_positions(fk_positions, parents=parents)
+    if save_file:
+        BVH.save('bvh/%s.bvh' % f, anim_ik, frametime=frametime)
+source_dir = '/path/to/joint/pos'
+error_file = ['M005836.npy', 'M000990.npy', '000990.npy', '005836.npy']
+npy_files = [file[:-4] for file in os.listdir(source_dir) if file.endswith('.npy') and file not in error_file]
+# Process files in parallel
+pool = multiprocessing.Pool(processes=8)
+for _ in tqdm(pool.imap_unordered(process_file, npy_files), total=len(npy_files)):
+    pass
+pool.close()
+pool.join()

pretrain.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import argparse
+import os
+import numpy as np
+import clip
+import wandb
+import datetime
+import torch.optim as optim
+from data import CLIPDataset
+from utils import augment_data
+from contrastive import ContrastiveModule
+def main(args):
+    train_dataset = CLIPDataset(args)
+    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True)
+    date = datetime.datetime.now().strftime("%d-%m-%y_%H:%M")
+    wandb.init(
+        project='UniMTS',
+        name=f"{args.run_tag}_{args.stage}_" + f"{date}"
+    )
+    model = ContrastiveModule(args).cuda()
+    optimizer = optim.Adam(model.parameters(), lr=1e-4)
+    save_path = './checkpoint/%s/' % args.run_tag
+    if not os.path.exists(save_path):
+        os.makedirs(save_path)
+    for epoch in range(args.num_epochs):
+        tol_loss = 0
+        model.train()
+        for i, batch in enumerate(train_loader):
+            inputs_imu = batch['imu'].float().cuda()
+            inputs_text = clip.tokenize(batch['text'], truncate=True).cuda()
+            mask = batch['mask'].float().cuda()
+            input = inputs_imu * mask
+            # rotation invariant
+            if args.aug:
+                input = augment_data(input)
+            if not args.gyro:
+                b, t, c = input.shape
+                indices = np.array([range(i, i+3) for i in range(0, c, 6)]).flatten()
+                input = input[:,:,indices]
+            b, t, c = input.shape
+            if args.stft:
+                input_stft = input.permute(0,2,1).reshape(b * c,t)
+                input_stft = torch.abs(torch.stft(input_stft, n_fft = 25, hop_length = 28, onesided = False, center = True, return_complex = True))
+                input_stft = input_stft.reshape(b, c, input_stft.shape[-2], input_stft.shape[-1]).reshape(b, c, t).permute(0,2,1)
+                input = torch.cat((input, input_stft), dim=-1)
+            input = input.reshape(b, t, 22, -1).permute(0, 3, 1, 2).unsqueeze(-1)
+            # IMU and text representations
+            logits_per_imu, logits_per_text = model(input, inputs_text)
+            # positive keys are the entries on the diagonal
+            labels = torch.arange(len(batch['imu'])).cuda()
+            loss = F.cross_entropy(logits_per_imu / args.temperature, labels, reduction="mean")
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            tol_loss += len(inputs_imu) * loss.item()
+            # print(epoch, i, loss.item())
+        print(f'Epoch [{epoch+1}/{args.num_epochs}], Loss: {tol_loss / len(train_dataset):.4f}')
+        wandb.log({'loss': tol_loss / len(train_dataset)})
+        if epoch > 0 and epoch % args.log == 0:
+            torch.save(model.model.state_dict(), os.path.join(save_path, f'epoch_{epoch}.pth'))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Unified Pre-trained Motion Time Series Model')
+    # data
+    parser.add_argument('--padding_size', type=int, default='200', help='padding size (default: 200)')
+    parser.add_argument('--sample', type=float, default='1', help='pre-training down-sample ratio (default: 1)')
+    parser.add_argument('--data_path', type=str, default='./data/', help='/path/to/data/')
+    # training
+    parser.add_argument('--run_tag', type=str, default='exp0', help='logging tag')
+    parser.add_argument('--stage', type=str, default='pretrain', help='training stage')
+    parser.add_argument('--num_epochs', type=int, default=100, help='number of pre-training epochs')
+    parser.add_argument('--gyro', type=int, default=0, help='using gyro or not')
+    parser.add_argument('--stft', type=int, default=0, help='using stft or not')
+    parser.add_argument('--aug', type=int, default=1, help='using augmentation or not')
+    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+    parser.add_argument('--temperature', type=float, default=0.1, help='temperature')
+    parser.add_argument('--log', type=int, default=10, help='logging step')
+    args = parser.parse_args()
+    main(args)

run_evaluation.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+python evaluate.py \
+--batch_size 64 \
+--checkpoint './checkpoint/UniMTS.pth' \
+--data_path 'UniMTS_data'

run_evaluation_custom.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+python evaluate_custom.py \
+--batch_size 64 \
+--checkpoint './checkpoint/UniMTS.pth' \
+--X_path 'UniMTS_data/TNDA-HAR/X_test.npy' \
+--y_path 'UniMTS_data/TNDA-HAR/y_test.npy' \
+--config_path 'UniMTS_data/TNDA-HAR/TNDA-HAR.json' \
+--joint_list 20 2 21 3 11 \
+--original_sampling_rate 50

run_finetune.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+for k in 1 2 3 5 10
+do
+python finetune.py \
+--mode full \
+--k $k \
+--batch_size 64 \
+--num_epochs 200 \
+--checkpoint './checkpoint/UniMTS.pth' \
+--data_path 'UniMTS_data'
+done
+python finetune.py \
+--mode full \
+--batch_size 64 \
+--num_epochs 200 \
+--checkpoint './checkpoint/UniMTS.pth' \
+--data_path 'UniMTS_data'

run_finetune_custom.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+for k in 1 2 3 5 10
+do
+python finetune_custom.py \
+--mode full \
+--k $k \
+--batch_size 64 \
+--num_epochs 200 \
+--checkpoint './checkpoint/UniMTS.pth' \
+--X_train_path 'UniMTS_data/TNDA-HAR/X_train.npy' \
+--y_train_path 'UniMTS_data/TNDA-HAR/y_train.npy' \
+--X_test_path 'UniMTS_data/TNDA-HAR/X_test.npy' \
+--y_test_path 'UniMTS_data/TNDA-HAR/y_test.npy' \
+--config_path 'UniMTS_data/TNDA-HAR/TNDA-HAR.json' \
+--joint_list 20 2 21 3 11 \
+--original_sampling_rate 50 \
+--num_class 8
+done
+python finetune_custom.py \
+--mode full \
+--batch_size 64 \
+--num_epochs 200 \
+--checkpoint './checkpoint/UniMTS.pth' \
+--X_train_path 'UniMTS_data/TNDA-HAR/X_train.npy' \
+--y_train_path 'UniMTS_data/TNDA-HAR/y_train.npy' \
+--X_test_path 'UniMTS_data/TNDA-HAR/X_test.npy' \
+--y_test_path 'UniMTS_data/TNDA-HAR/y_test.npy' \
+--config_path 'UniMTS_data/TNDA-HAR/TNDA-HAR.json' \
+--joint_list 20 2 21 3 11 \
+--original_sampling_rate 50 \
+--num_class 8

run_pretrain.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+python pretrain.py \
+--aug 1 \
+--batch_size 64 \
+--data_path 'UniMTS_data'

text_aug.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import openai
+import glob
+import os
+import shutil
+from tqdm import tqdm
+def load_api_key(file_path='api_key.txt'):
+    with open(file_path, 'r') as f:
+        for line in f:
+            if line.startswith('api_key='):
+                return line.strip().split('=', 1)[1]
+    return None
+openai.api_key = load_api_key()
+if openai.api_key is None:
+    print("Error: API key not found.")
+    exit()
+files = glob.glob('/path/to/txt')
+aug_dir = '/path/to/output'
+for f in tqdm(files):
+    file_id = f.split('/')[-1]
+    if not os.path.exists(aug_dir + file_id):
+        with open(f, 'r') as file:
+            lines = file.readlines()
+        text = []
+        for i, l in enumerate(lines):
+            text.append(str(i) + ': ')
+            text.append((l).split('#')[0].strip())
+            if text[-1][-1] != '.':
+                text.append('. ')
+            else:
+                text.append(' ')
+        text = ''.join(text)
+        prompt = 'The following one or multiple descriptions are describing the same human activities: '
+        prompt += text
+        prompt += 'Generate 3 paraphrases to describe the same activities. One in a line in a plain text format ending with \n, without numbering or - at the beginning. Do not generate any other analysis except from the paraphrased descriptions.'
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                    {"role": "user", "content": prompt}
+                ]
+        )
+        pred = response.choices[0]['message']['content']
+        # res = pred.split('\n')
+        shutil.copy(f, aug_dir)
+        with open(aug_dir + file_id, 'a') as log_file:
+            log_file.write(pred)
+files = glob.glob('/path/to/output')
+for f in tqdm(files):
+    with open(f, 'r') as file:
+        lines = file.readlines()
+    lines = [line.lstrip("- ")  for line in lines if line.strip()]
+    with open(f, 'w') as file:
+        file.writelines(lines)

utils.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import imageio
+import io
+def random_rotation_matrix():
+    # Random quaternion
+    q = torch.randn(4)
+    q = q / torch.norm(q)
+    # Quaternion to rotation matrix
+    R = torch.tensor([
+        [1 - 2*q[2]**2 - 2*q[3]**2, 2*q[1]*q[2] - 2*q[3]*q[0], 2*q[1]*q[3] + 2*q[2]*q[0]],
+        [2*q[1]*q[2] + 2*q[3]*q[0], 1 - 2*q[1]**2 - 2*q[3]**2, 2*q[2]*q[3] - 2*q[1]*q[0]],
+        [2*q[1]*q[3] - 2*q[2]*q[0], 2*q[2]*q[3] + 2*q[1]*q[0], 1 - 2*q[1]**2 - 2*q[2]**2]
+    ])
+    return R
+def augment_data(data):
+    B, T, M = data.shape
+    augmented_data = torch.zeros_like(data)
+    for i in range(B):
+        for c in range(0, M, 6):
+            R = random_rotation_matrix().cuda()
+            acc = data[i, :, c:c+3].transpose(0, 1)  # Shape (3, T)
+            gyro = data[i, :, c+3:c+6].transpose(0, 1)  # Shape (3, T)
+            # Apply rotation
+            rotated_acc = torch.matmul(R, acc)
+            rotated_gyro = torch.matmul(R, gyro)
+            # Concatenate and assign to augmented_data
+            augmented_data[i, :, c:c+3] = rotated_acc.transpose(0, 1)
+            augmented_data[i, :, c+3:c+6] = rotated_gyro.transpose(0, 1)
+    return augmented_data
+def update_limits(data):
+    # Get global min and max for each axis
+    min_x, max_x = np.min(data[:, :, 0]), np.max(data[:, :, 0])
+    min_y, max_y = np.min(data[:, :, 2]), np.max(data[:, :, 2])
+    min_z, max_z = np.min(data[:, :, 1]), np.max(data[:, :, 1])
+    # Add some padding to ensure the skeleton doesn't touch the plot edges
+    padding = 0.1
+    x_range = max_x - min_x
+    y_range = max_y - min_y
+    z_range = max_z - min_z
+    return (min_x - padding * x_range, max_x + padding * x_range), \
+           (min_y - padding * y_range, max_y + padding * y_range), \
+           (min_z - padding * z_range, max_z + padding * z_range)
+def plot_skeleton(frame_data, xlims, ylims, zlims, dataset):
+    """
+    Plot a single frame of skeleton data.
+    """
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    ax.scatter(frame_data[:, 0], frame_data[:, 2], frame_data[:, 1])
+    # Add code here to connect the joints as per your skeleton structure
+    if dataset == 't2m':
+        connections = [
+            [0, 2, 5, 8, 11],
+            [0, 1, 4, 7, 10],
+            [0, 3, 6, 9, 12, 15],
+            [9, 14, 17, 19, 21],
+            [9, 13, 16, 18, 20]
+        ]
+    if dataset == 'kit':
+        connections = [
+            [0, 11, 12, 13, 14, 15],
+            [0, 16, 17, 18, 19, 20],
+            [0, 1, 2, 3, 4],
+            [3, 5, 6, 7],
+            [3, 8, 9, 10]
+        ]
+    if dataset == 'ntu':
+        connections = [
+            [0, 12, 13, 14, 15],
+            [0, 16, 17, 18, 19],
+            [0, 1, 20, 2, 3],
+            [20, 4, 5, 6, 7, 21],
+            [7, 22],
+            [20, 8, 9, 10, 11, 23],
+            [11, 24],
+        ]
+    # Plot the lines for each sequence
+    for connection in connections:
+        for i in range(len(connection)-1):
+            start_joint = connection[i]
+            end_joint = connection[i+1]
+            ax.plot([frame_data[start_joint, 0], frame_data[end_joint, 0]],
+                    [frame_data[start_joint, 2], frame_data[end_joint, 2]],
+                    [frame_data[start_joint, 1], frame_data[end_joint, 1]])
+    ax.view_init(elev=10, azim=90)
+    ax.set_box_aspect((np.ptp(xlims), np.ptp(ylims), np.ptp(zlims)))
+    ax.set_xlim(xlims)
+    ax.set_ylim(ylims)
+    ax.set_zlim(zlims)
+    ax.set_xlabel('X')
+    ax.set_ylabel('Z')
+    ax.set_zlabel('Y')
+    # Save the plot to a buffer
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    img = imageio.imread(buf)
+    buf.close()
+    plt.close(fig)  # Close the figure to prevent display
+    return img
+def plot_skeleton_gif(data, dataset):
+    xlims, ylims, zlims = update_limits(data)
+    images = [plot_skeleton(frame, xlims, ylims, zlims, dataset) for frame in data]
+    imageio.mimsave('./skeleton_animation.gif', images, fps=20)
+    return
+def plot_single_skeleton(data, dataset, frame=0):
+    xlims, ylims, zlims = update_limits(data)
+    frame_data = data[frame]
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    ax.scatter(frame_data[:, 0], frame_data[:, 2], frame_data[:, 1])
+    # Add code here to connect the joints as per your skeleton structure
+    if dataset == 't2m':
+        connections = [
+            [0, 2, 5, 8, 11],
+            [0, 1, 4, 7, 10],
+            [0, 3, 6, 9, 12, 15],
+            [9, 14, 17, 19, 21],
+            [9, 13, 16, 18, 20]
+        ]
+    if dataset == 'kit':
+        connections = [
+            [0, 11, 12, 13, 14, 15],
+            [0, 16, 17, 18, 19, 20],
+            [0, 1, 2, 3, 4],
+            [3, 5, 6, 7],
+            [3, 8, 9, 10]
+        ]
+    if dataset == 'ntu':
+        connections = [
+            [0, 12, 13, 14, 15],
+            [0, 16, 17, 18, 19],
+            [0, 1, 20, 2, 3],
+            [20, 4, 5, 6, 7, 21],
+            [7, 22],
+            [20, 8, 9, 10, 11, 23],
+            [11, 24],
+        ]
+    # Plot the lines for each sequence
+    for connection in connections:
+        for i in range(len(connection)-1):
+            start_joint = connection[i]
+            end_joint = connection[i+1]
+            ax.plot([frame_data[start_joint, 0], frame_data[end_joint, 0]],
+                    [frame_data[start_joint, 2], frame_data[end_joint, 2]],
+                    [frame_data[start_joint, 1], frame_data[end_joint, 1]])
+    #ax.view_init(elev=10, azim=90)
+    ax.set_box_aspect((np.ptp(xlims), np.ptp(ylims), np.ptp(zlims)))
+    ax.set_xlim(xlims)
+    ax.set_ylim(ylims)
+    ax.set_zlim(zlims)
+    ax.set_xlabel('X')
+    ax.set_ylabel('Z')
+    ax.set_zlabel('Y')
+    plt.savefig('skeleton.pdf', bbox_inches='tight')
+def compute_height(joints, head_index, l_foot_index, r_foot_index):
+    joints = torch.from_numpy(joints)
+    left = (joints[:,head_index,1] - joints[:,l_foot_index,1])[0]
+    right = (joints[:,head_index,1] - joints[:,r_foot_index,1])[0]
+    height = (left + right) / 2
+    return height
+def compute_metrics_np(similarity_matrix, correct_labels):
+    B, _ = similarity_matrix.shape
+    ranked_indices = np.argsort(-similarity_matrix, axis=1)
+    correct_label_ranks = np.array([np.where(ranked_indices[i] == correct_labels[i])[0][0] for i in range(B)]) + 1
+    # Compute R@K
+    R_at_1 = np.mean(correct_label_ranks <= 1)
+    R_at_2 = np.mean(correct_label_ranks <= 2)
+    R_at_3 = np.mean(correct_label_ranks <= 3)
+    R_at_4 = np.mean(correct_label_ranks <= 4)
+    R_at_5 = np.mean(correct_label_ranks <= 5)
+    # Compute MRR
+    MRR = np.mean(1.0 / correct_label_ranks)
+    return R_at_1, R_at_2, R_at_3, R_at_4, R_at_5, MRR