Skip to main content
The builder helpers module provides utility functions for common tasks when building molecular systems, including grouping atoms, assigning types, and setting charges from selections or tables.

Quick Start

import warp_md as wp
from warp_md.builder import (
    group_indices,
    charges_from_selections,
    charges_from_table
)

# Load system
sys = wp.System("protein.pdb")

# Group atoms by residue
backbone = sys.select("backbone")
groups = group_indices(sys, backbone, group_by="resid")
print(f"Found {len(groups)} residues")

# Assign charges from selections
charges = charges_from_selections(sys, [
    {"selection": "name CA", "charge": 0.5},
    {"selection": "name C", "charge": 0.3},
])

Functions

group_indices

Group atom indices from a selection by residue, chain, or both.
def group_indices(
    system: System,
    selection: Selection,
    group_by: str
) -> List[List[int]]:
    """Group selection indices by structural unit.
    
    Args:
        system: Molecular system
        selection: Atom selection to group
        group_by: Grouping mode ("resid", "chain", "resid_chain")
        
    Returns:
        List of index groups, sorted by grouping key
        
    Raises:
        ValueError: If group_by is invalid
    """
system
System
required
warp-md System object
selection
Selection
required
Selection to group
group_by
str
required
Grouping mode:
  • "resid" - Group by residue ID
  • "chain" - Group by chain ID
  • "resid_chain" or "chain_resid" - Group by (chain, resid) tuple
Example: Group by Residue
import warp_md as wp
from warp_md.builder import group_indices

sys = wp.System("protein.pdb")

# Group backbone atoms by residue
backbone = sys.select("backbone")
residue_groups = group_indices(sys, backbone, group_by="resid")

for i, group in enumerate(residue_groups):
    print(f"Residue {i+1}: {len(group)} atoms")
    
# Get CA atom from each residue
ca_indices = [group[0] for group in residue_groups]
Example: Group by Chain
# Group protein atoms by chain
protein = sys.select("protein")
chain_groups = group_indices(sys, protein, group_by="chain")

for chain_idx, group in enumerate(chain_groups):
    print(f"Chain {chain_idx}: {len(group)} atoms")
Example: Group by Chain and Residue
# Group all atoms by (chain, residue)
all_atoms = sys.select("all")
chain_residue_groups = group_indices(sys, all_atoms, group_by="resid_chain")

print(f"Total groups: {len(chain_residue_groups)}")

# Each group is atoms from one residue in one chain
for group in chain_residue_groups[:5]:
    atoms = sys.atom_table()
    chain = atoms["chain_id"][group[0]]
    resid = atoms["resid"][group[0]]
    resname = atoms["resname"][group[0]]
    print(f"Chain {chain}, Residue {resid} ({resname}): {len(group)} atoms")

group_types_from_selections

Assign type indices to groups based on selection matching.
def group_types_from_selections(
    system: System,
    selection: Selection,
    group_by: str,
    type_selections: Sequence[str]
) -> List[int]:
    """Assign type indices to groups.
    
    Args:
        system: Molecular system
        selection: Selection to group and type
        group_by: Grouping mode ("resid", "chain", "resid_chain")
        type_selections: Selection strings for each type
        
    Returns:
        List of type indices (one per group)
        
    Raises:
        ValueError: If a group matches no type selection
    """
system
System
required
warp-md System object
selection
Selection
required
Selection to group and type
group_by
str
required
Grouping mode (“resid”, “chain”, “resid_chain”)
type_selections
Sequence[str]
required
Selection strings for each type (type index = position in list)
Example: Classify Residues
from warp_md.builder import group_types_from_selections

sys = wp.System("protein.pdb")

# Classify residues as charged, polar, or hydrophobic
protein = sys.select("protein")
type_selections = [
    "resname ARG LYS ASP GLU HIS",  # Type 0: charged
    "resname SER THR ASN GLN CYS",  # Type 1: polar
    "resname ALA VAL LEU ILE PHE TRP TYR MET PRO GLY",  # Type 2: hydrophobic
]

residue_types = group_types_from_selections(
    sys, protein, "resid", type_selections
)

print(f"Charged: {residue_types.count(0)}")
print(f"Polar: {residue_types.count(1)}")
print(f"Hydrophobic: {residue_types.count(2)}")
Example: Classify Chains
# Classify chains as protein, nucleic, or other
all_atoms = sys.select("all")
chain_types = group_types_from_selections(
    sys,
    all_atoms,
    group_by="chain",
    type_selections=[
        "protein",
        "nucleic",
        "not protein and not nucleic"
    ]
)

for i, type_idx in enumerate(chain_types):
    type_name = ["protein", "nucleic", "other"][type_idx]
    print(f"Chain {i}: {type_name}")

charges_from_selections

Assign atomic charges based on selection rules.
def charges_from_selections(
    system: System,
    selections: Iterable[Mapping[str, float]],
    default: float = 0.0
) -> List[float]:
    """Assign charges from selection/charge pairs.
    
    Args:
        system: Molecular system
        selections: Iterable of {"selection": str, "charge": float} dicts
        default: Default charge for unmatched atoms
        
    Returns:
        List of charges (one per atom)
        
    Raises:
        ValueError: If entry missing 'selection' or 'charge' key
    """
system
System
required
warp-md System object
selections
Iterable[Mapping[str, float]]
required
Selection/charge pairs. Each entry must have:
  • "selection" - Selection expression (str)
  • "charge" - Charge value (float)
default
float
default:"0.0"
Default charge for atoms not matching any selection
Example: Basic Charge Assignment
from warp_md.builder import charges_from_selections

sys = wp.System("protein.pdb")

# Assign simple charges
charges = charges_from_selections(sys, [
    {"selection": "name CA", "charge": 0.5},
    {"selection": "name C", "charge": 0.3},
    {"selection": "name O", "charge": -0.5},
    {"selection": "name N", "charge": -0.3},
], default=0.0)

print(f"Assigned {len(charges)} charges")
print(f"Total charge: {sum(charges):.2f}")
Example: Residue-Based Charges
# Assign charges based on residue type
charge_rules = [
    # Charged residues
    {"selection": "resname ARG and name CZ", "charge": 1.0},
    {"selection": "resname LYS and name NZ", "charge": 1.0},
    {"selection": "resname ASP and name CG", "charge": -1.0},
    {"selection": "resname GLU and name CD", "charge": -1.0},
    # Partial charges
    {"selection": "backbone and name C", "charge": 0.4},
    {"selection": "backbone and name O", "charge": -0.4},
    {"selection": "backbone and name N", "charge": -0.3},
    {"selection": "backbone and name H", "charge": 0.3},
]

charges = charges_from_selections(sys, charge_rules, default=0.0)
Example: Water Charges (TIP3P)
# Assign TIP3P water charges
water_charges = charges_from_selections(sys, [
    {"selection": "resname HOH and name O", "charge": -0.834},
    {"selection": "resname HOH and name H1", "charge": 0.417},
    {"selection": "resname HOH and name H2", "charge": 0.417},
])

charges_from_table

Load charges from a CSV/TSV file.
def charges_from_table(
    system: System,
    path: str,
    delimiter: str | None = None,
    default: float = 0.0
) -> List[float]:
    """Load charges from tabular file.
    
    Args:
        system: Molecular system
        path: Path to CSV/TSV file
        delimiter: Column delimiter (auto-detected if None)
        default: Default charge for unmatched atoms
        
    Returns:
        List of charges (one per atom)
        
    Raises:
        ValueError: If required columns missing
    """
system
System
required
warp-md System object
path
str
required
Path to charge table file (CSV or TSV)
delimiter
str
Column delimiter (, for CSV, \t for TSV). Auto-detected if None.
default
float
default:"0.0"
Default charge for atoms not in table
Required columns (case-insensitive):
  • Residue name: resname, residue, or res
  • Atom name: name, atom, atom_name, or atomname
  • Charge: charge or q
Example Table (charges.csv):
resname,name,charge
ALA,N,-0.415
ALA,H,0.271
ALA,CA,0.033
ALA,HA,0.082
ALA,CB,-0.182
ALA,HB1,0.060
ALA,HB2,0.060
ALA,HB3,0.060
ALA,C,0.597
ALA,O,-0.567
Example Usage:
from warp_md.builder import charges_from_table

sys = wp.System("protein.pdb")

# Load charges from CSV
charges = charges_from_table(sys, "charges.csv")

print(f"Assigned {len(charges)} charges")
print(f"Net charge: {sum(charges):.2f}")
Example: TSV File
# Load from tab-separated file
charges = charges_from_table(
    sys,
    "charges.tsv",
    delimiter="\t"  # Explicit TSV
)
Example: Force Field Charges
# Load AMBER charges
charges = charges_from_table(
    sys,
    "amber_charges.csv",
    default=0.0
)

# Verify neutrality
net_charge = sum(charges)
if abs(net_charge) > 0.001:
    print(f"Warning: System net charge is {net_charge:.3f}")

Complete Example

import warp_md as wp
from warp_md.builder import (
    group_indices,
    group_types_from_selections,
    charges_from_selections
)

# Load system
sys = wp.System("protein.pdb")

# Group residues
protein = sys.select("protein")
residue_groups = group_indices(sys, protein, group_by="resid")
print(f"Protein has {len(residue_groups)} residues")

# Classify residues by type
type_selections = [
    "resname ARG LYS ASP GLU",  # Charged
    "resname SER THR ASN GLN",  # Polar
    "resname ALA VAL LEU ILE PHE TRP TYR MET PRO GLY",  # Hydrophobic
]

residue_types = group_types_from_selections(
    sys, protein, "resid", type_selections
)

type_names = ["Charged", "Polar", "Hydrophobic"]
for name, count in zip(type_names, [residue_types.count(i) for i in range(3)]):
    print(f"{name}: {count} residues")

# Assign charges
charge_rules = [
    {"selection": "resname ARG and name CZ", "charge": 1.0},
    {"selection": "resname LYS and name NZ", "charge": 1.0},
    {"selection": "resname ASP and name CG", "charge": -1.0},
    {"selection": "resname GLU and name CD", "charge": -1.0},
]

charges = charges_from_selections(sys, charge_rules, default=0.0)
net_charge = sum(charges)
print(f"Net charge: {net_charge:.1f}")

# Analyze charge distribution
for type_idx, type_name in enumerate(type_names):
    type_residues = [i for i, t in enumerate(residue_types) if t == type_idx]
    type_atoms = [atom for res_idx in type_residues for atom in residue_groups[res_idx]]
    type_charge = sum(charges[i] for i in type_atoms)
    print(f"{type_name} total charge: {type_charge:.1f}")

Advanced Patterns

Custom Atom Grouping

from warp_md.builder import group_indices

sys = wp.System("complex.pdb")

# Get all CA atoms grouped by chain
ca_atoms = sys.select("name CA")
ca_by_chain = group_indices(sys, ca_atoms, group_by="chain")

# Calculate chain lengths
atoms = sys.atom_table()
for chain_idx, ca_indices in enumerate(ca_by_chain):
    chain_id = atoms["chain_id"][ca_indices[0]]
    print(f"Chain {chain_id}: {len(ca_indices)} residues")

Conditional Charge Assignment

from warp_md.builder import charges_from_selections

# Assign pH-dependent charges
ph = 7.0

if ph < 6.0:
    # Protonated histidine
    his_charge = 1.0
else:
    # Neutral histidine
    his_charge = 0.0

charge_rules = [
    {"selection": "resname HIS and name NE2", "charge": his_charge},
    {"selection": "resname ARG and name CZ", "charge": 1.0},
    {"selection": "resname LYS and name NZ", "charge": 1.0},
    {"selection": "resname ASP and name CG", "charge": -1.0},
    {"selection": "resname GLU and name CD", "charge": -1.0},
]

charges = charges_from_selections(sys, charge_rules)

Merging Charge Sources

from warp_md.builder import charges_from_table, charges_from_selections

# Start with force field charges
charges = charges_from_table(sys, "ff_charges.csv", default=0.0)

# Override specific atoms
overrides = [
    {"selection": "resid 42 and name CA", "charge": 0.5},  # Custom modification
]

for override in overrides:
    sel = sys.select(override["selection"])
    for idx in sel.indices:
        charges[idx] = override["charge"]

print(f"Final net charge: {sum(charges):.2f}")

See Also

Build docs developers (and LLMs) love