In [1]:
import pandas as pd
import numpy as np
import sqlite3
import os
import pyzstd

In [13]:
folder_name = "test.d"
if not os.path.exists(folder_name):
    os.mkdir(folder_name)
tdf_file_name = os.path.join(folder_name, "analysis.tdf")
tdf_bin_file_name = os.path.join(folder_name, "analysis.tdf_bin")
num_cycles = 2
frames_per_cycle = 2
num_frames = num_cycles * frames_per_cycle
num_scans = 4
scanmode = 8
mz_min = 100.000000
mz_max = 1000.000000
im_min = 0.5
im_max = 1.5
num_tof = num_frames * num_scans
num_tof = (num_tof + 1) * num_tof // 2

In [14]:
# tofs = np.arange(1, num_tof + 1)
# intensities = np.arange(1, num_tof + 1) * 2
frame_data = []
count = 0
offset = 0
msms_type = [0 if i % 2 == 0 else scanmode for i in range(num_frames)]
for frame in range(num_frames):
    frame_tofs = []
    frame_ints = []
    frame_counts = []
    for scan in range(num_scans):
        count += 1
        frame_tofs.append(np.arange(1 + offset, 1 + offset + count))
        frame_ints.append(np.arange(1 + offset, 1 + offset + count) * 2)
        frame_counts.append(count)
        offset += count
    frame_data.append(
        (
            frame_counts,
            frame_tofs,
            frame_ints,
        )
    )

In [15]:
frame = frame_data[0]
data = []
frame_offsets = []
frame_offset = 0
summed_intensities = []
max_intensities = []
num_peaks = []
for frame in frame_data:
    frame_offsets.append(frame_offset)
    scans = frame[0]
    scan_count = len(scans)
    ints = np.concatenate(frame[2])
    summed_intensities.append(np.sum(ints))
    max_intensities.append(np.max(ints))
    num_peaks.append(len(ints))
    buffer = np.zeros(scan_count + len(ints) * 2, dtype=np.uint32)
    buffer[0] = scan_count
    buffer[1:scan_count] = np.array(scans[:-1]) * 2
    buffer[scan_count + 1::2] = ints
    offset = scan_count
    for tofs in frame[1]:
        buffer[offset] = tofs[0]
        buffer[offset + 2: offset + 2 * len(tofs): 2] = np.diff(tofs)
        offset += 2 * len(tofs)
    buffer = np.frombuffer(buffer, dtype=np.uint8)
    buffer = buffer.reshape(-1, 4).T.flatten()
    decompressed_bytes = buffer
    compressed_data = pyzstd.compress(decompressed_bytes)
    compressed_data = np.frombuffer(compressed_data, dtype=np.uint8)
    frame_size = len(compressed_data) + 8
    data.append(np.frombuffer(np.array([frame_size],dtype=np.uint32), dtype=np.uint8))
    data.append(np.frombuffer(np.array([scan_count],dtype=np.uint32), dtype=np.uint8))
    data.append(compressed_data)
    frame_offset += frame_size
bin_data = np.concatenate(data)

In [16]:
if os.path.exists(tdf_bin_file_name):
    os.remove(tdf_bin_file_name)
    
with open(tdf_bin_file_name, "wb") as tdf_bin_file:
    tdf_bin_file.write(bin_data.tobytes())

In [17]:
size = num_frames
peaks = num_scans * (num_scans + 1) // 2

frames = pd.DataFrame(
    {
        'Id': np.arange(1, size + 1),
        'Time': np.arange(1, size + 1, dtype=np.float64) / 10,
        'Polarity': ["+"] * size,
        'ScanMode': [scanmode] * size,
        'MsMsType': msms_type,
        'TimsId': frame_offsets,
        'MaxIntensity': max_intensities,
        'SummedIntensities': summed_intensities,
        'NumScans': [num_scans] * size,
        'NumPeaks': num_peaks,
#         'MzCalibration': [1] * size,
#         'T1': [1] * size,
#         'T2': [1] * size,
#         'TimsCalibration': [1] * size,
#         'PropertyGroup': [1] * size,
        'AccumulationTime': [100] * size,
        'RampTime': [100] * size,
#         'Pressure': [2] * size,
    }
)
frames

Unnamed: 0,Id,Time,Polarity,ScanMode,MsMsType,TimsId,MaxIntensity,SummedIntensities,NumScans,NumPeaks,AccumulationTime,RampTime
0,1,0.1,+,8,0,0,20,110,4,10,100,100
1,2,0.2,+,8,8,48,72,1222,4,26,100,100
2,3,0.3,+,8,0,130,156,4830,4,42,100,100
3,4,0.4,+,8,8,235,272,12470,4,58,100,100


In [18]:
size -= 1
precursors = pd.DataFrame(
    {
        'Id': np.arange(1, size + 1),
        'LargestPeakMz': 500.0 + np.arange(size),
        'AverageMz': 500.5 + np.arange(size),
        'MonoisotopicMz': 500.0 + np.arange(size),
        'Charge': [2 if i % 2 == 0 else 3 for i in range(size)],
        'ScanNumber': [1 if i % 2 == 0 else 2 for i in range(size)],
        'Intensity': [10] * size,
        'Parent': [(i // 2) * 2 + 1 for i in range(size)],
    }
)
size += 1
precursors

Unnamed: 0,Id,LargestPeakMz,AverageMz,MonoisotopicMz,Charge,ScanNumber,Intensity,Parent
0,1,500.0,500.5,500.0,2,1,10,1
1,2,501.0,501.5,501.0,3,2,10,1
2,3,502.0,502.5,502.0,2,1,10,3


In [19]:
fragment_frames = pd.DataFrame(
    {
        'Frame': [(i // 2 + 1) * 2 for i in range(size)],
        'ScanNumBegin': [2 if i % 2 == 0 else 1 for i in range(size)],
        'ScanNumEnd': [3 if i % 2 == 0 else 2 for i in range(size)],
        'IsolationMz': 500.5 + np.arange(size),
        'IsolationWidth': [2.0] * size,
        'CollisionEnergy': [0.0] * size,
        'Precursor': np.arange(1, size + 1),
    }
)
fragment_frames.iloc[-1] = fragment_frames.iloc[-3]
fragment_frames.Frame.values[-1] = fragment_frames.Frame.values[-2]
fragment_frames

Unnamed: 0,Frame,ScanNumBegin,ScanNumEnd,IsolationMz,IsolationWidth,CollisionEnergy,Precursor
0,2,2,3,500.5,2.0,0.0,1
1,2,1,2,501.5,2.0,0.0,2
2,4,2,3,502.5,2.0,0.0,3
3,4,1,2,501.5,2.0,0.0,2


In [20]:
global_meta_data = {
#     "SchemaType": "TDF",
#     "SchemaVersionMajor": 3,
#     "SchemaVersionMinor": 7,
#     "AcquisitionSoftwareVendor": "Bruker",
#     "InstrumentVendor": "Bruker",
#     "ClosedProperly": 1,
    "TimsCompressionType": 2,
    "MaxNumPeaksPerScan": int(frames.NumPeaks.values[-1]),
#     "AnalysisId": "00000000-0000-0000-0000-000000000000",
    "DigitizerNumSamples": num_tof,
    "MzAcqRangeLower": mz_min,
    "MzAcqRangeUpper": mz_max,
    "AcquisitionSoftware": "timsTOF",
#     "AcquisitionSoftwareVersion": "0.0",
#     "AcquisitionFirmwareVersion": "0.1",
#     "AcquisitionDateTime": "2023-05-05T21:20:37.229+02:00",
#     "InstrumentName": "timsTOF SCP",
#     "InstrumentFamily": 9,
#     "InstrumentRevision": 3,
#     "InstrumentSourceType": 11,
#     "InstrumentSerialNumber": 0,
#     "OperatorName": "Admin",
#     "Description": "",
    "SampleName": "test",
#     "MethodName": "test.m",
#     "DenoisingEnabled": 0,
#     "PeakWidthEstimateValue": 0.000025,
#     "PeakWidthEstimateType": 1,
#     "PeakListIndexScaleFactor": 1,
    "OneOverK0AcqRangeLower": im_min,
    "OneOverK0AcqRangeUpper": im_max,
#     "DigitizerType": "SA248P",
#     "DigitizerSerialNumber": "AQ00074235",
}
global_meta_data = pd.DataFrame(
    {
        "Key": global_meta_data.keys(),
        "Value": global_meta_data.values(),
    }
)
global_meta_data

Unnamed: 0,Key,Value
0,TimsCompressionType,2
1,MaxNumPeaksPerScan,58
2,DigitizerNumSamples,136
3,MzAcqRangeLower,100.0
4,MzAcqRangeUpper,1000.0
5,AcquisitionSoftware,timsTOF
6,SampleName,test
7,OneOverK0AcqRangeLower,0.5
8,OneOverK0AcqRangeUpper,1.5


In [21]:
if os.path.exists(tdf_file_name):
    os.remove(tdf_file_name)
with sqlite3.connect(tdf_file_name) as sql_database_connection:
    global_meta_data.to_sql(
        "GlobalMetaData",
        sql_database_connection,
        index=False
    )
    frames.to_sql(
        "Frames",
        sql_database_connection,
        index=False
    )
    precursors.to_sql(
        "Precursors",
        sql_database_connection,
        index=False
    )
    fragment_frames.to_sql(
        "PasefFrameMsMsInfo",
        sql_database_connection,
        index=False
    )

In [11]:
import alphatims.bruker
alphatims.bruker.BRUKER_DLL_FILE_NAME = ""
data = alphatims.bruker.TimsTOF(folder_name)
data.tof_indices

100%|█████████████████████████████████████████████| 4/4 [00:01<00:00,  2.83it/s]


array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135], dtype=uint32)

In [13]:
data.push_indptr

array([  0,   0,   0,   0,   0,   0,   1,   3,   6,  10,  10,  15,  21,
        28,  36,  36,  45,  55,  66,  78,  78,  91, 105, 120, 136, 136])

In [14]:
data[1]

Unnamed: 0,raw_indices,frame_indices,scan_indices,precursor_indices,push_indices,tof_indices,rt_values,rt_values_min,mobility_values,quad_low_mz_values,quad_high_mz_values,mz_values,intensity_values,corrected_intensity_values
0,0,1,0,0,5,0,0.1,0.001667,1.5,-1.0,-1.0,100.0,2,2
1,1,1,1,0,6,1,0.1,0.001667,1.3,-1.0,-1.0,103.18152,4,4
2,2,1,1,0,6,2,0.1,0.001667,1.3,-1.0,-1.0,106.412861,6,6
3,3,1,2,0,7,3,0.1,0.001667,1.1,-1.0,-1.0,109.694023,8,8
4,4,1,2,0,7,4,0.1,0.001667,1.1,-1.0,-1.0,113.025006,10,10
5,5,1,2,0,7,5,0.1,0.001667,1.1,-1.0,-1.0,116.40581,12,12
6,6,1,3,0,8,6,0.1,0.001667,0.9,-1.0,-1.0,119.836435,14,14
7,7,1,3,0,8,7,0.1,0.001667,0.9,-1.0,-1.0,123.316881,16,16
8,8,1,3,0,8,8,0.1,0.001667,0.9,-1.0,-1.0,126.847147,18,18
9,9,1,3,0,8,9,0.1,0.001667,0.9,-1.0,-1.0,130.427235,20,20
