Overview
Lodum provides native serialization support for popular data science libraries:
NumPy : ndarray serialization
Pandas : DataFrame and Series serialization
Polars : DataFrame and Series serialization
These extensions are automatically registered when the libraries are installed and imported.
Extensions require the respective libraries to be installed. Use: pip install lodum[numpy]
pip install lodum[pandas]
pip install lodum[polars]
# Or install all at once
pip install lodum[all]
NumPy Support
Arrays
NumPy arrays are serialized as nested lists:
import numpy as np
from lodum import lodum, json
@lodum
class DataContainer :
def __init__ ( self , values : np.ndarray, metadata : str ):
self .values = values
self .metadata = metadata
# Create with numpy array
data = DataContainer(
values = np.array([[ 1 , 2 , 3 ], [ 4 , 5 , 6 ]]),
metadata = "sensor_readings"
)
# Serialize to JSON
json_str = json.dumps(data)
print (json_str)
# {"values": [[1, 2, 3], [4, 5, 6]], "metadata": "sensor_readings"}
# Deserialize back
restored = json.loads(DataContainer, json_str)
assert isinstance (restored.values, np.ndarray)
assert np.array_equal(restored.values, data.values)
How It Works
The NumPy extension uses ndarray.tolist() for serialization:
# From src/lodum/extensions/numpy.py
def _dump_numpy_array ( obj : Any, dumper : Dumper, depth : int , seen : Optional[ set ]) -> Any:
from ..internal import dump
return dump(obj.tolist(), dumper, depth + 1 , seen)
def _load_numpy_array (
cls : Type[Any], loader : Loader, path : Optional[ str ] = None , depth : int = 0
) -> Any:
from ..internal import load
return np.array(load(List, loader, path, depth + 1 ))
Multidimensional Arrays
import numpy as np
from lodum import json
# 3D array
array_3d = np.array([
[[ 1 , 2 ], [ 3 , 4 ]],
[[ 5 , 6 ], [ 7 , 8 ]]
])
json_str = json.dumps(array_3d)
# [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
restored = json.loads(np.ndarray, json_str)
assert restored.shape == ( 2 , 2 , 2 )
Data Types
NumPy data types are preserved through JSON conversion where possible:
import numpy as np
from lodum import json
# Integer arrays
int_array = np.array([ 1 , 2 , 3 ], dtype = np.int32)
json_str = json.dumps(int_array)
restored = json.loads(np.ndarray, json_str)
# Float arrays
float_array = np.array([ 1.5 , 2.5 , 3.5 ], dtype = np.float64)
json_str = json.dumps(float_array)
restored = json.loads(np.ndarray, json_str)
Specialized NumPy dtypes (like datetime64 or custom structured types) are converted to Python primitives during serialization. The restored array will use NumPy’s default dtype inference.
Pandas Support
DataFrames
Pandas DataFrames are serialized using the records orientation:
import pandas as pd
from lodum import lodum, json
@lodum
class Dataset :
def __init__ ( self , name : str , data : pd.DataFrame):
self .name = name
self .data = data
# Create DataFrame
df = pd.DataFrame({
'id' : [ 1 , 2 , 3 ],
'name' : [ 'Alice' , 'Bob' , 'Charlie' ],
'score' : [ 85.5 , 90.0 , 78.5 ]
})
dataset = Dataset( name = "test_scores" , data = df)
# Serialize
json_str = json.dumps(dataset)
print (json_str)
# {
# "name": "test_scores",
# "data": [
# {"id": 1, "name": "Alice", "score": 85.5},
# {"id": 2, "name": "Bob", "score": 90.0},
# {"id": 3, "name": "Charlie", "score": 78.5}
# ]
# }
# Deserialize
restored = json.loads(Dataset, json_str)
assert isinstance (restored.data, pd.DataFrame)
assert len (restored.data) == 3
assert restored.data[ 'name' ].tolist() == [ 'Alice' , 'Bob' , 'Charlie' ]
How DataFrames Are Serialized
# From src/lodum/extensions/pandas.py
def _dump_pandas_dataframe (
obj : Any, dumper : Dumper, depth : int , seen : Optional[ set ]
) -> Any:
from ..internal import dump
return dump(obj.to_dict( orient = "records" ), dumper, depth + 1 , seen)
def _load_pandas_dataframe (
cls : Type[Any], loader : Loader, path : Optional[ str ] = None , depth : int = 0
) -> Any:
from ..internal import load
data = load(List[Dict[ str , Any]], loader, path, depth + 1 )
return pd.DataFrame.from_records(data)
Series
Pandas Series are serialized as dictionaries:
import pandas as pd
from lodum import lodum, json
@lodum
class Measurements :
def __init__ ( self , values : pd.Series):
self .values = values
# Create Series
series = pd.Series([ 10 , 20 , 30 ], index = [ 'a' , 'b' , 'c' ])
measurements = Measurements( values = series)
# Serialize
json_str = json.dumps(measurements)
print (json_str)
# {"values": {"a": 10, "b": 20, "c": 30}}
# Deserialize
restored = json.loads(Measurements, json_str)
assert isinstance (restored.values, pd.Series)
assert restored.values[ 'a' ] == 10
Complex DataFrames
import pandas as pd
from datetime import datetime
from lodum import json
# DataFrame with datetime and mixed types
df = pd.DataFrame({
'timestamp' : [datetime( 2024 , 1 , 1 ), datetime( 2024 , 1 , 2 )],
'value' : [ 100.5 , 200.7 ],
'status' : [ 'active' , 'inactive' ]
})
json_str = json.dumps(df)
restored = json.loads(pd.DataFrame, json_str)
# Note: datetime strings need manual conversion
restored[ 'timestamp' ] = pd.to_datetime(restored[ 'timestamp' ])
Polars Support
DataFrames
Polars DataFrames are serialized as dictionaries (column-oriented):
import polars as pl
from lodum import lodum, json
@lodum
class PolarsData :
def __init__ ( self , name : str , data : pl.DataFrame):
self .name = name
self .data = data
# Create Polars DataFrame
df = pl.DataFrame({
'id' : [ 1 , 2 , 3 ],
'value' : [ 10.5 , 20.3 , 30.1 ],
'category' : [ 'A' , 'B' , 'A' ]
})
data = PolarsData( name = "experiment" , data = df)
# Serialize
json_str = json.dumps(data)
print (json_str)
# {
# "name": "experiment",
# "data": {
# "id": [1, 2, 3],
# "value": [10.5, 20.3, 30.1],
# "category": ["A", "B", "A"]
# }
# }
# Deserialize
restored = json.loads(PolarsData, json_str)
assert isinstance (restored.data, pl.DataFrame)
assert restored.data.height == 3
assert restored.data[ 'id' ].to_list() == [ 1 , 2 , 3 ]
How Polars DataFrames Are Serialized
# From src/lodum/extensions/polars.py
def _dump_polars_dataframe (
obj : Any, dumper : Dumper, depth : int , seen : Optional[ set ]
) -> Any:
from ..internal import dump
return dump(obj.to_dict(), dumper, depth + 1 , seen)
def _load_polars_dataframe (
cls : Type[Any], loader : Loader, path : Optional[ str ] = None , depth : int = 0
) -> Any:
from ..internal import load
return pl.DataFrame(load( dict , loader, path, depth + 1 ))
Series
Polars Series are serialized as lists:
import polars as pl
from lodum import lodum, json
@lodum
class Sequence :
def __init__ ( self , values : pl.Series):
self .values = values
# Create Series
series = pl.Series( "numbers" , [ 1 , 2 , 3 , 4 , 5 ])
seq = Sequence( values = series)
# Serialize
json_str = json.dumps(seq)
print (json_str)
# {"values": [1, 2, 3, 4, 5]}
# Deserialize
restored = json.loads(Sequence, json_str)
assert isinstance (restored.values, pl.Series)
assert restored.values.to_list() == [ 1 , 2 , 3 , 4 , 5 ]
Extension Registration
Extensions are automatically registered on import:
# From src/lodum/__init__.py
try :
from .extensions import numpy as ext_numpy
ext_numpy.register()
except ImportError :
pass
try :
from .extensions import pandas as ext_pandas
ext_pandas.register()
except ImportError :
pass
try :
from .extensions import polars as ext_polars
ext_polars.register()
except ImportError :
pass
This means extensions “just work” when the libraries are installed - no additional configuration needed.
Custom Extensions
You can create your own type handlers using the same pattern:
from lodum.registry import TypeHandler, registry
from lodum.core import Dumper, Loader
from typing import Any, Dict, Optional, Type
# Define your custom type
class CustomArray :
def __init__ ( self , data : list ):
self .data = data
# Define handlers
def dump_custom_array ( obj : Any, dumper : Dumper, depth : int , seen : Optional[ set ]) -> Any:
from lodum.internal import dump
return dump(obj.data, dumper, depth + 1 , seen)
def load_custom_array (
cls : Type[Any], loader : Loader, path : Optional[ str ] = None , depth : int = 0
) -> Any:
from lodum.internal import load
data = load( list , loader, path, depth + 1 )
return CustomArray(data)
def schema_custom_array (
t : Type[Any], depth : int , visited : Optional[ set ]
) -> Dict[ str , Any]:
return { "type" : "array" }
# Register the handler
registry.register(
CustomArray,
TypeHandler(dump_custom_array, load_custom_array, schema_custom_array)
)
All extensions work seamlessly across all Lodum formats:
from lodum import json
import numpy as np
array = np.array([ 1 , 2 , 3 ])
json_str = json.dumps(array)
NumPy arrays use tolist() which creates a Python list copy. For very large arrays, consider:
Using binary formats (MsgPack, CBOR) for better performance
Splitting large arrays into chunks
Using specialized formats like HDF5 for persistence
The records orientation creates a list of dictionaries. For large DataFrames:
Consider using Parquet or Feather for efficient storage
Use streaming serialization for memory efficiency
Filter unnecessary columns before serialization
Polars uses column-oriented serialization which is efficient for:
DataFrames with many rows but few columns
Homogeneous data types per column
Direct integration with columnar formats
Next Steps
Basic Usage Return to basic usage patterns
Advanced Features Explore field customization and validation