| Crates.io | taco_format |
| lib.rs | taco_format |
| version | 0.1.3 |
| created_at | 2025-11-24 18:42:12.591+00 |
| updated_at | 2026-01-12 08:25:10.056913+00 |
| description | TACO (Trajectory and Compressed Observables) Format for molecular dynamics data |
| homepage | |
| repository | https://github.com/username/taco_format |
| max_upload_size | |
| id | 1948383 |
| size | 2,238,277 |
TACO is a high-performance binary format for molecular dynamics (MD) trajectory data, designed for efficient storage and processing of large simulation trajectories.
TACO provides significant space savings compared to traditional formats:
TACO provides a Python interface for easy integration with analysis tools.
pip install taco-format
import taco_format
import numpy as np
from ase import Atoms
# Create some example Atoms objects
atoms_list = [Atoms('H2O') for _ in range(100)]
# Writing
taco_format.write('traj.taco', atoms_list)
# Reading
atoms_list = taco_format.read('traj.taco')
TACO supports efficiently appending frames to existing trajectory files without rewriting the entire file:
import taco_format
from ase import Atoms
# Create initial trajectory
initial_frames = [Atoms('H2O') for _ in range(100)]
taco_format.write('traj.taco', initial_frames)
# Later, append more frames to the same file
additional_frames = [Atoms('H2O') for _ in range(50)]
taco_format.append('traj.taco', additional_frames)
# The file now contains 150 frames total
all_frames = taco_format.read('traj.taco')
print(len(all_frames)) # Output: 150
Benefits of Append:
Rust API:
use taco_format::{Writer, Frame, FrameData};
use ndarray::Array2;
// Create initial trajectory
let mut writer = Writer::create(
"trajectory.taco",
num_atoms,
time_step,
sim_metadata,
atom_metadata,
compression_settings,
)?;
// Write initial frames
for i in 0..100 {
let frame = create_frame(i); // Your frame creation logic
writer.write_frame(frame)?;
}
writer.finish()?;
// Later, append more frames
let mut writer = Writer::append("trajectory.taco")?;
for i in 100..150 {
let frame = create_frame(i);
writer.write_frame(frame)?;
}
writer.finish()?;
# Write with custom settings
taco_format.write('traj.taco', atoms_list,
time_step=0.002, # in picoseconds
full_frame_interval=50, # store full frame every 50 frames
compression_level=5, # zstd compression level (1-22)
lossless=False) # use lossy compression
# Read specific frames
frames = taco_format.read('traj.taco',
frame_indices=[0, 10, 20, 30, 40])
# Read a range of frames
frames = taco_format.read('traj.taco',
start_frame=100,
end_frame=200) # Reads frames 100-199
# Efficient writing for large trajectories
taco_format.write('big_traj.taco', big_atoms_list,
compression_level=3) # Use moderate compression
TACO provides built-in tensor operations for common trajectory analyses:
import taco_format
import numpy as np
# Calculate center of mass
positions = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=np.float32)
masses = np.array([[1.0], [12.0], [16.0]], dtype=np.float32)
com = taco_format.center_of_mass(positions, masses)
# Extract subset of atoms
indices = [0, 2, 4] # Atoms to extract
subset = taco_format.extract_subset(positions, indices)
# Calculate RMSD between two coordinate sets
coords1 = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]], dtype=np.float32)
coords2 = np.array([[0.1, 0.1, 0.1], [1.1, 0.1, 0.1]], dtype=np.float32)
rmsd = taco_format.calc_rmsd(coords1, coords2)
The Python interface also includes utility functions for working with TACO files:
# Check if file is a TACO file
is_taco = taco_format.is_taco_file("trajectory.taco")
# Get file information
info = taco_format.get_file_info("trajectory.taco")
print(info)
# Copy frames from one file to another
taco_format.copy_frames("source.taco", "subset.taco",
start_frame=10, num_frames=50)
# Extract specific atoms
atom_indices = [0, 1, 2, 10, 15, 20] # Atoms to extract
taco_format.extract_atoms("full.taco", "subset.taco", atom_indices)
TACO provides native interfaces for C, C++, and Fortran, enabling integration with existing molecular dynamics codes and high-performance computing applications.
All interfaces are located in the c_api/ directory with a single, canonical implementation.
The C API provides a low-level interface suitable for integration with C programs and as a foundation for other language bindings.
#include "taco_format_c.h"
// Setup metadata
const char* names[] = {"O", "H", "H"};
const char* types[] = {"O", "H", "H"};
float masses[] = {15.999, 1.008, 1.008};
taco_atom_metadata_t atom_metadata = {
.masses = masses, .names = names, .types = types, .num_atoms = 3
};
taco_simulation_metadata_t sim_metadata = {
.name = NULL, .description = NULL, .ensemble = NULL,
.temperature = 300.0, .pressure = 1.0, .software = NULL, .timestep_fs = 1.0
};
taco_compression_settings_t compression = {.precision = 0, .zstd_level = 3};
// Create writer
CTacoWriter* writer = taco_writer_create("output.taco", 3, 0.001,
&sim_metadata, &atom_metadata,
compression);
// Write frame
float positions[] = {0.0, 0.0, 0.0, 0.1, 0.08, 0.0, 0.1, -0.08, 0.0};
taco_frame_t frame = {
.frame_number = 0, .time = 0.0, .positions = positions,
.num_atoms = 3, /* other fields... */
};
taco_writer_write_frame(writer, &frame);
taco_writer_finish(writer);
The C++ API provides a modern interface with RAII, STL containers, and exception handling.
#include "taco_format.hpp"
// Setup metadata
std::vector<float> masses = {15.999f, 1.008f, 1.008f};
std::vector<std::string> names = {"O", "H", "H"};
taco::AtomMetadata atom_metadata(masses, names);
taco::SimulationMetadata sim_metadata;
sim_metadata.ensemble = "NVT";
sim_metadata.temperature = 300.0;
// Create writer (RAII - automatically closes)
taco::Writer writer("output.taco", 3, 0.001, sim_metadata, atom_metadata);
// Create and write frame
taco::Frame frame;
frame.positions = {{0.0f, 0.0f, 0.0f}, {0.1f, 0.08f, 0.0f}, {0.1f, -0.08f, 0.0f}};
writer.write_frame(frame);
// Read all frames
taco::Reader reader("output.taco");
auto all_frames = reader.read_all_frames();
The Fortran API provides a modern Fortran 2008 interface with ISO C binding.
program taco_example
use iso_c_binding
use iso_fortran_env, only: real32, real64, int64
use taco_format
implicit none
type(c_ptr) :: writer, reader
type(taco_frame_t) :: frame
type(taco_compression_settings_t) :: compression
type(taco_atom_metadata_t) :: atom_meta
type(taco_simulation_metadata_t) :: sim_meta
real(real32), target :: masses(3) = [15.999, 1.008, 1.008]
real(real32), target :: positions(9) = [0.0, 0.0, 0.0, 0.1, 0.08, 0.0, 0.1, -0.08, 0.0]
character(len=8), target :: names(3) = ['O ', 'H ', 'H ']
character(len=8), target :: types(3) = ['O ', 'H ', 'H ']
type(c_ptr), target :: name_ptrs(3), type_ptrs(3)
integer :: error_code, i
! Setup pointers for strings
do i = 1, 3
name_ptrs(i) = c_loc(names(i))
type_ptrs(i) = c_loc(types(i))
end do
! Setup metadata
sim_meta%temperature = 300.0_c_double
sim_meta%pressure = 1.0_c_double
sim_meta%timestep_fs = 1.0_c_double
! Set other fields to null
atom_meta%masses = c_loc(masses)
atom_meta%names = c_loc(name_ptrs)
atom_meta%types = c_loc(type_ptrs)
atom_meta%num_atoms = 3
compression%precision = 0 ! lossless
compression%zstd_level = 3
! Create writer
writer = taco_writer_create('output.taco', 3, 0.001_real64, &
sim_meta, atom_meta, compression)
! Setup frame
frame%frame_number = 0
frame%time = 0.0_c_double
frame%positions = c_loc(positions)
frame%num_atoms = 3
! Set other fields...
! Write frame
error_code = taco_writer_write_frame(writer, frame)
error_code = taco_writer_finish(writer)
end program
# Build the C API library
cd c_api
cargo build --release
# Build and test C examples
make test_c_api_static
./test_c_api_static
# Build and run Fortran examples and tests
cd fortran
make all # Build interface, examples, and tests
make run-examples # Run all examples
make run-tests # Run all tests
The C API is located in c_api/ with:
taco_format_c.h - C header filesrc/lib.rs - Rust implementation with C FFItest_c_api.c - C example/testThe Fortran API is located in c_api/fortran/ with:
taco_format.f90 - Fortran interface moduleexamples/ - Comprehensive Fortran examplestests/ - Fortran unit and integration testsSee C/C++/Fortran API Documentation for complete details.
use taco_format::{Writer, Frame, FrameData, SimulationMetadata, AtomMetadata, CompressionSettings};
use ndarray::Array2;
// Create metadata
let sim_metadata = SimulationMetadata::default();
let atom_metadata = AtomMetadata::default();
// Create a writer
let mut writer = Writer::create(
"trajectory.taco", // File path
1000, // Number of atoms
0.001, // Time step (ps)
sim_metadata,
atom_metadata,
CompressionSettings::default(),
)?;
// Write frames
let positions = Array2::<f32>::zeros((1000, 3));
let frame_data = FrameData::new(positions);
let frame = Frame::new(0, 0.0, frame_data);
writer.write_frame(frame)?;
// Write multiple frames sequentially
let frames = vec![frame1, frame2, frame3, ...];
writer.write_frames(frames)?;
// Finish writing
writer.finish()?;
use taco_format::Reader;
// Open a reader
let mut reader = Reader::open("trajectory.taco")?;
// Get header information
println!("Num atoms: {}", reader.num_atoms());
println!("Num frames: {}", reader.num_frames());
// Read a specific frame
let frame = reader.read_frame(42)?;
let positions = frame.data.positions.unwrap();
// Read a range of frames
let frames = reader.read_frame_range(100, 200)?; // Frames 100-199
// Read specific frames
let frame_indices = vec![10, 20, 30, 40, 50];
let selected_frames = reader.read_frames(&frame_indices)?;
// Iterate through all frames
for frame_result in reader.iter_frames() {
let frame = frame_result?;
// Process frame...
}
use taco_format::tensor;
use ndarray::{Array1, Array2};
// Calculate center of mass
let positions = Array2::<f32>::zeros((100, 3));
let masses = Array1::<f32>::ones(100);
let com = tensor::center_of_mass(&positions, &Some(masses));
// Extract subset of atoms
let atom_indices = vec![0, 1, 5, 10];
let subset = tensor::extract_subset(&positions, &atom_indices);
// Calculate RMSD between two coordinate sets
let coords1 = Array2::<f32>::zeros((100, 3));
let coords2 = Array2::<f32>::zeros((100, 3));
let rmsd = tensor::calc_rmsd(&coords1, &coords2)?;
[Header]
- Format version
- Simulation parameters (time step, temperature, etc.)
- Atom metadata (masses, names, etc.)
- Compression settings
[Frame Index Table]
- Byte offsets to each frame for random access
[Data Blocks]
- Full and delta frames:
- Position tensors (Nx3)
- Velocity tensors (Nx3)
- Force tensors (Nx3)
- Box dimensions & energies
git clone https://github.com/username/taco-format.git
cd taco-format
cargo build --release
For Python bindings:
pip install maturin
maturin develop --release
MIT