The Dictizable Pattern in Python
The Dictizable pattern is one I’ve used quite often while developing in Python. Here’s a quick overview of it:
The Class
Say we have information about users and we want to create a class to store this information nicely.
We could end up with something like this:
from dataclasses import dataclass
from typing import Optional
@dataclass
class Address:
city: str
state: str
zip_code: str
address_line_1: str
address_line_2: Optional[str] = None
@dataclass
class User:
username: str
address: Address
Here we have a User object which has an Address object within it.
We can instantiate a User object and use it in our code wherever we’d like.
from model.Address import Address
from model.User import User
address = Address("New York", "New York", "10040", "57 Ivy Drive")
user = User("joeyagreco", address)
The Database
We can save and retrieve this User object to and from a NoSQL database.
Let’s see what that would look like:
from model.Address import Address
from model.User import User
from pymongo import MongoClient
from typing import Optional
def save_user_to_mongo(*, db_name: str, collection_name: str, user: User):
# convert User/Address to dictionary
address_dict = {
"city": user.address.city,
"state": user.address.state,
"zip_code": user.address.zip_code,
"address_line_1": user.address.address_line_1,
"address_line_2": user.address.address_line_2,
}
user_dict = {"username": user.username, "address": address_dict}
# save to mongo
client = MongoClient("localhost", 27017)
db = client[db_name]
collection = db[collection_name]
collection.insert_one(user_dict)
def retrieve_user_from_mongo(
*, db_name: str, collection_name: str, username: str
) -> Optional[User]:
# retrieve from mongo
client = MongoClient("localhost", 27017)
db = client[db_name]
collection = db[collection_name]
user_data = collection.find_one({"username": username})
user = None
# convert dictionary to User/Address
if user_data:
user = User(
username=user_data["username"],
address=User.Address(
city=user_data["address"]["city"],
state=user_data["address"]["state"],
zip_code=user_data["address"]["zip_code"],
address_line_1=user_data["address"]["address_line_1"],
address_line_2=user_data["address"]["address_line_2"],
),
)
return user
# save
save_user_to_mongo(db_name="mydatabase", collection_name="mycollection", user=my_user_obj)
# retrieve
user: User = retrieve_user_from_mongo(
db_name="mydatabase", collection_name="mycollection", username="joeyagreco"
)
We need the User object in dictionary form before we can save it to the Mongo database, so we have a custom method that takes a User object and converts it before saving it.
This is less than ideal as our codebase grows and we add more custom classes that we’d like to save and retrieve to and from our database. Each time we need to save and retrieve a new type of class, we’ll need new logic to convert the class to and from a dictionary.
The Dictizable Pattern
A clean solution to this I’ve implemented many times is to create a few interfaces that structure how our classes should look.
Here is an interface DictDeserializable that can be implemented by classes that should contain a from_dict() method, which is responsible for taking a dictionary and turning it into an instance of the class.
from __future__ import annotations
from abc import ABC, abstractmethod
class DictDeserializable(ABC):
@staticmethod
@abstractmethod
def from_dict(d: dict) -> DictDeserializable:
"""
Takes a dict and turns it into an instance of *this* class.
"""
Here is an interface DictSerializable that can be implemented by classes that should contain a to_dict() method, which is responsible for turning itself into a dictionary.
from abc import ABC, abstractmethod
class DictSerializable(ABC):
@abstractmethod
def to_dict(self) -> dict:
"""
Takes *this* instance of the implemented method's class and returns its representation as a dictionary.
"""
For classes that we want to contain both of these functionalities, we can wrap both of these interfaces into a single interface: Dictizable.
from __future__ import annotations
from model.abstract.DictDeserializable import DictDeserializable
from model.abstract.DictSerializable import DictSerializable
class Dictizable(DictSerializable, DictDeserializable):
...
Applying the Pattern
Let’s rewrite our User class using this pattern:
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from model.abstract.Dictizable import Dictizable
@dataclass
class User(Dictizable):
username: str
address: Address
def to_dict(self) -> dict:
return {"username": self.username, "address": self.address.to_dict()}
@staticmethod
def from_dict(d: dict) -> User:
return User(username=d["username"], address=Address.from_dict(d["address"]))
@dataclass
class Address(Dictizable):
city: str
state: str
zip_code: str
address_line_1: str
address_line_2: Optional[str] = None
def to_dict(self) -> dict:
return {
"city": self.city,
"state": self.state,
"zip_code": self.zip_code,
"address_line_1": self.address_line_1,
"address_line_2": self.address_line_2,
}
@staticmethod
def from_dict(d: dict) -> Address:
return Address(
city=d["city"],
state=d["state"],
zip_code=d["zip_code"],
address_line_1=d["address_line_1"],
address_line_2=d.get("address_line_2"),
)
One of the great parts about this pattern is its recursive nature.
We can have as many nested classes within a class as we want and as long as they all inherit the Dictizable class, they will always have the proper methods to call to serialize and deserialize the data.
# our user in dictionary form
user_dict = {
"username": "joeyagreco",
"address": {
"city": "New York",
"state": "New York",
"zip_code": "10040",
"address_line_1": "57 Ivy Drive",
},
}
# our user as an instance of the User class
user_obj = User.from_dict(user_dict)
# our user back into dictionary form
user_dict_2 = user_obj.to_dict()
# True
assert user_dict == user_dict_2
We can swap between a concrete class and a dictionary simply by calling these methods!
Back to the Database
Let’s see what our database interaction looks like now with this Dictizable pattern applied:
from pymongo import MongoClient
from model.User import User
from model.abstract.Dictizable import Dictizable
def save_to_mongo(*, db_name: str, collection_name: str, obj: Dictizable):
# save to mongo
client = MongoClient("localhost", 27017)
db = client[db_name]
collection = db[collection_name]
# data is of type Dictizable, so we know it will have a to_dict() method
collection.insert_one(data.to_dict())
def retrieve_from_mongo(
*, db_name: str, collection_name: str, filters: dict, to_class: Dictizable
) -> Optional[Dictizable]:
# retrieve from mongo
client = MongoClient("localhost", 27017)
db = client[db_name]
collection = db[collection_name]
data = collection.find_one(filters)
obj = None
if data:
# data is a dictionary and we've given a Dictizable class, which will have a to_dict() method
obj = to_class.from_dict(data)
return obj
# save
save_to_mongo(db_name="mydatabase", collection_name="mycollection", obj=my_user_obj)
# retrieve
user: User = retrieve_from_mongo(
db_name="mydatabase",
collection_name="mycollection",
filters={"username": "joeyagreco"},
to_class=User,
)
Summary
The Dictizable pattern gives a clear way for classes to define how to transform its data. This is especially useful for when we want to take a concrete class and move it to and from a database.
It has some clear advantages:
- Keeps logic about the class within the class
- Gives classes an interface that can be referenced using types everywhere in our code
- Makes nested objects much easier to deal with
Bonus: But What About SQL?
You can also use this pattern when dealing with SQL databases:
from model.abstract.DictSerializable import DictSerializable
from typing import Any
class BaseSqlRepository:
def __init__(self, *, schema: str, table: str):
self._schema = schema
self._table = table
def _build_insert_query(self, *, rows: list[DictSerializable]) -> tuple[str, list[Any]]:
"""
Builds a query to insert the given rows.
Returns the query and the params for the query.
"""
rows_dict = [row.to_dict() for row in rows]
columns = [column for column in rows_dict[0].keys()]
placeholders = ", ".join(["%s"] * len(rows_dict[0]))
values = ", ".join([f"({placeholders})"] * len(rows_dict))
query = f"INSERT INTO {self._schema}.{self._table} ({', '.join(columns)}) VALUES {values};"
params = [value for row in rows_dict for value in row.values()]
return query, params
And use it like so:
from typing import Optional
from model.User import User
from repository.BaseSqlRepository import BaseSqlRepository
from repository.PostgresRepository import PostgresRespository
class UserRepository(BaseSqlRepository, PostgresRespository):
def __init__(self, connection_string: str):
BaseSqlRepository.__init__(self, schema="public", table="user")
PostgresRespository.__init__(self, connection_string=connection_string)
def insert_users(self, users: list[User]) -> None:
query, values = self._build_insert_query(rows=discord_users)
self._execute_query(query, values)
def get_users(self, limit: Optional[int] = None) -> list[User]:
return self._get_rows(
schema=self._schema, table=self._table, obj_class=User, limit=limit
)
Note: with this SQL example, you typically do not want to have nested objects within an object that represents a row in a SQL database. So in this case, we would likely flatten the address info into the User object OR update our to_dict() and from_dict() methods to accommodate this.