Initial commit

This commit is contained in:
2025-11-18 03:36:49 +08:00
commit d17c7efb3c
7078 changed files with 831480 additions and 0 deletions

View File

@@ -0,0 +1,254 @@
"""
Interfaces for serializing Django objects.
Usage::
from django.core import serializers
json = serializers.serialize("json", some_queryset)
objects = list(serializers.deserialize("json", json))
To add your own serializers, use the SERIALIZATION_MODULES setting::
SERIALIZATION_MODULES = {
"csv": "path.to.csv.serializer",
"txt": "path.to.txt.serializer",
}
"""
import importlib
from django.apps import apps
from django.conf import settings
from django.core.serializers.base import SerializerDoesNotExist
# Built-in serializers
BUILTIN_SERIALIZERS = {
"xml": "django.core.serializers.xml_serializer",
"python": "django.core.serializers.python",
"json": "django.core.serializers.json",
"yaml": "django.core.serializers.pyyaml",
"jsonl": "django.core.serializers.jsonl",
}
_serializers = {}
class BadSerializer:
"""
Stub serializer to hold exception raised during registration
This allows the serializer registration to cache serializers and if there
is an error raised in the process of creating a serializer it will be
raised and passed along to the caller when the serializer is used.
"""
internal_use_only = False
def __init__(self, exception):
self.exception = exception
def __call__(self, *args, **kwargs):
raise self.exception
def register_serializer(format, serializer_module, serializers=None):
"""Register a new serializer.
``serializer_module`` should be the fully qualified module name
for the serializer.
If ``serializers`` is provided, the registration will be added
to the provided dictionary.
If ``serializers`` is not provided, the registration will be made
directly into the global register of serializers. Adding serializers
directly is not a thread-safe operation.
"""
if serializers is None and not _serializers:
_load_serializers()
try:
module = importlib.import_module(serializer_module)
except ImportError as exc:
bad_serializer = BadSerializer(exc)
module = type(
"BadSerializerModule",
(),
{
"Deserializer": bad_serializer,
"Serializer": bad_serializer,
},
)
if serializers is None:
_serializers[format] = module
else:
serializers[format] = module
def unregister_serializer(format):
"Unregister a given serializer. This is not a thread-safe operation."
if not _serializers:
_load_serializers()
if format not in _serializers:
raise SerializerDoesNotExist(format)
del _serializers[format]
def get_serializer(format):
if not _serializers:
_load_serializers()
if format not in _serializers:
raise SerializerDoesNotExist(format)
return _serializers[format].Serializer
def get_serializer_formats():
if not _serializers:
_load_serializers()
return list(_serializers)
def get_public_serializer_formats():
if not _serializers:
_load_serializers()
return [k for k, v in _serializers.items() if not v.Serializer.internal_use_only]
def get_deserializer(format):
if not _serializers:
_load_serializers()
if format not in _serializers:
raise SerializerDoesNotExist(format)
return _serializers[format].Deserializer
def serialize(format, queryset, **options):
"""
Serialize a queryset (or any iterator that returns database objects) using
a certain serializer.
"""
s = get_serializer(format)()
s.serialize(queryset, **options)
return s.getvalue()
def deserialize(format, stream_or_string, **options):
"""
Deserialize a stream or a string. Return an iterator that yields ``(obj,
m2m_relation_dict)``, where ``obj`` is an instantiated -- but *unsaved* --
object, and ``m2m_relation_dict`` is a dictionary of ``{m2m_field_name :
list_of_related_objects}``.
"""
d = get_deserializer(format)
return d(stream_or_string, **options)
def _load_serializers():
"""
Register built-in and settings-defined serializers. This is done lazily so
that user code has a chance to (e.g.) set up custom settings without
needing to be careful of import order.
"""
global _serializers
serializers = {}
for format in BUILTIN_SERIALIZERS:
register_serializer(format, BUILTIN_SERIALIZERS[format], serializers)
if hasattr(settings, "SERIALIZATION_MODULES"):
for format in settings.SERIALIZATION_MODULES:
register_serializer(
format, settings.SERIALIZATION_MODULES[format], serializers
)
_serializers = serializers
def sort_dependencies(app_list, allow_cycles=False):
"""Sort a list of (app_config, models) pairs into a single list of models.
The single list of models is sorted so that any model with a natural key
is serialized before a normal model, and any model with a natural key
dependency has it's dependencies serialized first.
If allow_cycles is True, return the best-effort ordering that will respect
most of dependencies but ignore some of them to break the cycles.
"""
# Process the list of models, and get the list of dependencies
model_dependencies = []
models = set()
for app_config, model_list in app_list:
if model_list is None:
model_list = app_config.get_models()
for model in model_list:
models.add(model)
# Add any explicitly defined dependencies
if hasattr(model, "natural_key"):
deps = getattr(model.natural_key, "dependencies", [])
if deps:
deps = [apps.get_model(dep) for dep in deps]
else:
deps = []
# Now add a dependency for any FK relation with a model that
# defines a natural key
for field in model._meta.fields:
if field.remote_field:
rel_model = field.remote_field.model
if hasattr(rel_model, "natural_key") and rel_model != model:
deps.append(rel_model)
# Also add a dependency for any simple M2M relation with a model
# that defines a natural key. M2M relations with explicit through
# models don't count as dependencies.
for field in model._meta.many_to_many:
if field.remote_field.through._meta.auto_created:
rel_model = field.remote_field.model
if hasattr(rel_model, "natural_key") and rel_model != model:
deps.append(rel_model)
model_dependencies.append((model, deps))
model_dependencies.reverse()
# Now sort the models to ensure that dependencies are met. This
# is done by repeatedly iterating over the input list of models.
# If all the dependencies of a given model are in the final list,
# that model is promoted to the end of the final list. This process
# continues until the input list is empty, or we do a full iteration
# over the input models without promoting a model to the final list.
# If we do a full iteration without a promotion, that means there are
# circular dependencies in the list.
model_list = []
while model_dependencies:
skipped = []
changed = False
while model_dependencies:
model, deps = model_dependencies.pop()
# If all of the models in the dependency list are either already
# on the final model list, or not on the original serialization list,
# then we've found another model with all it's dependencies satisfied.
if all(d not in models or d in model_list for d in deps):
model_list.append(model)
changed = True
else:
skipped.append((model, deps))
if not changed:
if allow_cycles:
# If cycles are allowed, add the last skipped model and ignore
# its dependencies. This could be improved by some graph
# analysis to ignore as few dependencies as possible.
model, _ = skipped.pop()
model_list.append(model)
else:
raise RuntimeError(
"Can't resolve dependencies for %s in serialized app list."
% ", ".join(
model._meta.label
for model, deps in sorted(
skipped, key=lambda obj: obj[0].__name__
)
),
)
model_dependencies = skipped
return model_list

View File

@@ -0,0 +1,387 @@
"""
Module for abstract serializer/unserializer base classes.
"""
from io import StringIO
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
DEFER_FIELD = object()
class SerializerDoesNotExist(KeyError):
"""The requested serializer was not found."""
pass
class SerializationError(Exception):
"""Something bad happened during serialization."""
pass
class DeserializationError(Exception):
"""Something bad happened during deserialization."""
@classmethod
def WithData(cls, original_exc, model, fk, field_value):
"""
Factory method for creating a deserialization error which has a more
explanatory message.
"""
return cls(
"%s: (%s:pk=%s) field_value was '%s'"
% (original_exc, model, fk, field_value)
)
class M2MDeserializationError(Exception):
"""Something bad happened during deserialization of a ManyToManyField."""
def __init__(self, original_exc, pk):
self.original_exc = original_exc
self.pk = pk
class ProgressBar:
progress_width = 75
def __init__(self, output, total_count):
self.output = output
self.total_count = total_count
self.prev_done = 0
def update(self, count):
if not self.output:
return
perc = count * 100 // self.total_count
done = perc * self.progress_width // 100
if self.prev_done >= done:
return
self.prev_done = done
cr = "" if self.total_count == 1 else "\r"
self.output.write(
cr + "[" + "." * done + " " * (self.progress_width - done) + "]"
)
if done == self.progress_width:
self.output.write("\n")
self.output.flush()
class Serializer:
"""
Abstract serializer base class.
"""
# Indicates if the implemented serializer is only available for
# internal Django use.
internal_use_only = False
progress_class = ProgressBar
stream_class = StringIO
def serialize(
self,
queryset,
*,
stream=None,
fields=None,
use_natural_foreign_keys=False,
use_natural_primary_keys=False,
progress_output=None,
object_count=0,
**options,
):
"""
Serialize a queryset.
"""
self.options = options
self.stream = stream if stream is not None else self.stream_class()
self.selected_fields = fields
self.use_natural_foreign_keys = use_natural_foreign_keys
self.use_natural_primary_keys = use_natural_primary_keys
progress_bar = self.progress_class(progress_output, object_count)
self.start_serialization()
self.first = True
for count, obj in enumerate(queryset, start=1):
self.start_object(obj)
# Use the concrete parent class' _meta instead of the object's _meta
# This is to avoid local_fields problems for proxy models. Refs #17717.
concrete_model = obj._meta.concrete_model
# When using natural primary keys, retrieve the pk field of the
# parent for multi-table inheritance child models. That field must
# be serialized, otherwise deserialization isn't possible.
if self.use_natural_primary_keys:
pk = concrete_model._meta.pk
pk_parent = (
pk if pk.remote_field and pk.remote_field.parent_link else None
)
else:
pk_parent = None
for field in concrete_model._meta.local_fields:
if field.serialize or field is pk_parent:
if field.remote_field is None:
if (
self.selected_fields is None
or field.attname in self.selected_fields
):
self.handle_field(obj, field)
else:
if (
self.selected_fields is None
or field.attname[:-3] in self.selected_fields
):
self.handle_fk_field(obj, field)
for field in concrete_model._meta.local_many_to_many:
if field.serialize:
if (
self.selected_fields is None
or field.attname in self.selected_fields
):
self.handle_m2m_field(obj, field)
self.end_object(obj)
progress_bar.update(count)
self.first = self.first and False
self.end_serialization()
return self.getvalue()
def start_serialization(self):
"""
Called when serializing of the queryset starts.
"""
raise NotImplementedError(
"subclasses of Serializer must provide a start_serialization() method"
)
def end_serialization(self):
"""
Called when serializing of the queryset ends.
"""
pass
def start_object(self, obj):
"""
Called when serializing of an object starts.
"""
raise NotImplementedError(
"subclasses of Serializer must provide a start_object() method"
)
def end_object(self, obj):
"""
Called when serializing of an object ends.
"""
pass
def handle_field(self, obj, field):
"""
Called to handle each individual (non-relational) field on an object.
"""
raise NotImplementedError(
"subclasses of Serializer must provide a handle_field() method"
)
def handle_fk_field(self, obj, field):
"""
Called to handle a ForeignKey field.
"""
raise NotImplementedError(
"subclasses of Serializer must provide a handle_fk_field() method"
)
def handle_m2m_field(self, obj, field):
"""
Called to handle a ManyToManyField.
"""
raise NotImplementedError(
"subclasses of Serializer must provide a handle_m2m_field() method"
)
def getvalue(self):
"""
Return the fully serialized queryset (or None if the output stream is
not seekable).
"""
if callable(getattr(self.stream, "getvalue", None)):
return self.stream.getvalue()
class Deserializer:
"""
Abstract base deserializer class.
"""
def __init__(self, stream_or_string, **options):
"""
Init this serializer given a stream or a string
"""
self.options = options
if isinstance(stream_or_string, str):
self.stream = StringIO(stream_or_string)
else:
self.stream = stream_or_string
def __iter__(self):
return self
def __next__(self):
"""Iteration interface -- return the next item in the stream"""
raise NotImplementedError(
"subclasses of Deserializer must provide a __next__() method"
)
class DeserializedObject:
"""
A deserialized model.
Basically a container for holding the pre-saved deserialized data along
with the many-to-many data saved with the object.
Call ``save()`` to save the object (with the many-to-many data) to the
database; call ``save(save_m2m=False)`` to save just the object fields
(and not touch the many-to-many stuff.)
"""
def __init__(self, obj, m2m_data=None, deferred_fields=None):
self.object = obj
self.m2m_data = m2m_data
self.deferred_fields = deferred_fields
def __repr__(self):
return "<%s: %s(pk=%s)>" % (
self.__class__.__name__,
self.object._meta.label,
self.object.pk,
)
def save(self, save_m2m=True, using=None, **kwargs):
# Call save on the Model baseclass directly. This bypasses any
# model-defined save. The save is also forced to be raw.
# raw=True is passed to any pre/post_save signals.
models.Model.save_base(self.object, using=using, raw=True, **kwargs)
if self.m2m_data and save_m2m:
for accessor_name, object_list in self.m2m_data.items():
getattr(self.object, accessor_name).set(object_list)
# prevent a second (possibly accidental) call to save() from saving
# the m2m data twice.
self.m2m_data = None
def save_deferred_fields(self, using=None):
self.m2m_data = {}
for field, field_value in self.deferred_fields.items():
opts = self.object._meta
label = opts.app_label + "." + opts.model_name
if isinstance(field.remote_field, models.ManyToManyRel):
try:
values = deserialize_m2m_values(
field, field_value, using, handle_forward_references=False
)
except M2MDeserializationError as e:
raise DeserializationError.WithData(
e.original_exc, label, self.object.pk, e.pk
)
self.m2m_data[field.name] = values
elif isinstance(field.remote_field, models.ManyToOneRel):
try:
value = deserialize_fk_value(
field, field_value, using, handle_forward_references=False
)
except Exception as e:
raise DeserializationError.WithData(
e, label, self.object.pk, field_value
)
setattr(self.object, field.attname, value)
self.save()
def build_instance(Model, data, db):
"""
Build a model instance.
If the model instance doesn't have a primary key and the model supports
natural keys, try to retrieve it from the database.
"""
default_manager = Model._meta.default_manager
pk = data.get(Model._meta.pk.attname)
if (
pk is None
and hasattr(default_manager, "get_by_natural_key")
and hasattr(Model, "natural_key")
):
obj = Model(**data)
obj._state.db = db
natural_key = obj.natural_key()
try:
data[Model._meta.pk.attname] = Model._meta.pk.to_python(
default_manager.db_manager(db).get_by_natural_key(*natural_key).pk
)
except Model.DoesNotExist:
pass
return Model(**data)
def deserialize_m2m_values(field, field_value, using, handle_forward_references):
model = field.remote_field.model
if hasattr(model._default_manager, "get_by_natural_key"):
def m2m_convert(value):
if hasattr(value, "__iter__") and not isinstance(value, str):
return (
model._default_manager.db_manager(using)
.get_by_natural_key(*value)
.pk
)
else:
return model._meta.pk.to_python(value)
else:
def m2m_convert(v):
return model._meta.pk.to_python(v)
try:
pks_iter = iter(field_value)
except TypeError as e:
raise M2MDeserializationError(e, field_value)
try:
values = []
for pk in pks_iter:
values.append(m2m_convert(pk))
return values
except Exception as e:
if isinstance(e, ObjectDoesNotExist) and handle_forward_references:
return DEFER_FIELD
else:
raise M2MDeserializationError(e, pk)
def deserialize_fk_value(field, field_value, using, handle_forward_references):
if field_value is None:
return None
model = field.remote_field.model
default_manager = model._default_manager
field_name = field.remote_field.field_name
if (
hasattr(default_manager, "get_by_natural_key")
and hasattr(field_value, "__iter__")
and not isinstance(field_value, str)
):
try:
obj = default_manager.db_manager(using).get_by_natural_key(*field_value)
except ObjectDoesNotExist:
if handle_forward_references:
return DEFER_FIELD
else:
raise
value = getattr(obj, field_name)
# If this is a natural foreign key to an object that has a FK/O2O as
# the foreign key, use the FK value.
if model._meta.pk.remote_field:
value = value.pk
return value
return model._meta.get_field(field_name).to_python(field_value)

View File

@@ -0,0 +1,114 @@
"""
Serialize data to/from JSON
"""
import datetime
import decimal
import json
import uuid
from django.core.serializers.base import DeserializationError
from django.core.serializers.python import Deserializer as PythonDeserializer
from django.core.serializers.python import Serializer as PythonSerializer
from django.utils.duration import duration_iso_string
from django.utils.functional import Promise
from django.utils.timezone import is_aware
class Serializer(PythonSerializer):
"""Convert a queryset to JSON."""
internal_use_only = False
def _init_options(self):
self._current = None
self.json_kwargs = self.options.copy()
self.json_kwargs.pop("stream", None)
self.json_kwargs.pop("fields", None)
if self.options.get("indent"):
# Prevent trailing spaces
self.json_kwargs["separators"] = (",", ": ")
self.json_kwargs.setdefault("cls", DjangoJSONEncoder)
self.json_kwargs.setdefault("ensure_ascii", False)
def start_serialization(self):
self._init_options()
self.stream.write("[")
def end_serialization(self):
if self.options.get("indent"):
self.stream.write("\n")
self.stream.write("]")
if self.options.get("indent"):
self.stream.write("\n")
def end_object(self, obj):
# self._current has the field data
indent = self.options.get("indent")
if not self.first:
self.stream.write(",")
if not indent:
self.stream.write(" ")
if indent:
self.stream.write("\n")
json.dump(self.get_dump_object(obj), self.stream, **self.json_kwargs)
self._current = None
def getvalue(self):
# Grandparent super
return super(PythonSerializer, self).getvalue()
class Deserializer(PythonDeserializer):
"""Deserialize a stream or string of JSON data."""
def __init__(self, stream_or_string, **options):
if not isinstance(stream_or_string, (bytes, str)):
stream_or_string = stream_or_string.read()
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode()
try:
objects = json.loads(stream_or_string)
except Exception as exc:
raise DeserializationError() from exc
super().__init__(objects, **options)
def _handle_object(self, obj):
try:
yield from super()._handle_object(obj)
except (GeneratorExit, DeserializationError):
raise
except Exception as exc:
raise DeserializationError(f"Error deserializing object: {exc}") from exc
class DjangoJSONEncoder(json.JSONEncoder):
"""
JSONEncoder subclass that knows how to encode date/time, decimal types, and
UUIDs.
"""
def default(self, o):
# See "Date Time String Format" in the ECMA-262 specification.
if isinstance(o, datetime.datetime):
r = o.isoformat()
if o.microsecond:
r = r[:23] + r[26:]
if r.endswith("+00:00"):
r = r.removesuffix("+00:00") + "Z"
return r
elif isinstance(o, datetime.date):
return o.isoformat()
elif isinstance(o, datetime.time):
if is_aware(o):
raise ValueError("JSON can't represent timezone-aware times.")
r = o.isoformat()
if o.microsecond:
r = r[:12]
return r
elif isinstance(o, datetime.timedelta):
return duration_iso_string(o)
elif isinstance(o, (decimal.Decimal, uuid.UUID, Promise)):
return str(o)
else:
return super().default(o)

View File

@@ -0,0 +1,68 @@
"""
Serialize data to/from JSON Lines
"""
import json
from django.core.serializers.base import DeserializationError
from django.core.serializers.json import DjangoJSONEncoder
from django.core.serializers.python import Deserializer as PythonDeserializer
from django.core.serializers.python import Serializer as PythonSerializer
class Serializer(PythonSerializer):
"""Convert a queryset to JSON Lines."""
internal_use_only = False
def _init_options(self):
self._current = None
self.json_kwargs = self.options.copy()
self.json_kwargs.pop("stream", None)
self.json_kwargs.pop("fields", None)
self.json_kwargs.pop("indent", None)
self.json_kwargs["separators"] = (",", ": ")
self.json_kwargs.setdefault("cls", DjangoJSONEncoder)
self.json_kwargs.setdefault("ensure_ascii", False)
def start_serialization(self):
self._init_options()
def end_object(self, obj):
# self._current has the field data
json.dump(self.get_dump_object(obj), self.stream, **self.json_kwargs)
self.stream.write("\n")
self._current = None
def getvalue(self):
# Grandparent super
return super(PythonSerializer, self).getvalue()
class Deserializer(PythonDeserializer):
"""Deserialize a stream or string of JSON data."""
def __init__(self, stream_or_string, **options):
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode()
if isinstance(stream_or_string, str):
stream_or_string = stream_or_string.splitlines()
super().__init__(Deserializer._get_lines(stream_or_string), **options)
def _handle_object(self, obj):
try:
yield from super()._handle_object(obj)
except (GeneratorExit, DeserializationError):
raise
except Exception as exc:
raise DeserializationError(f"Error deserializing object: {exc}") from exc
@staticmethod
def _get_lines(stream):
for line in stream:
if not line.strip():
continue
try:
yield json.loads(line)
except Exception as exc:
raise DeserializationError() from exc

View File

@@ -0,0 +1,222 @@
"""
A Python "serializer". Doesn't do much serializing per se -- just converts to
and from basic Python data types (lists, dicts, strings, etc.). Useful as a basis for
other serializers.
"""
from django.apps import apps
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.db.models import CompositePrimaryKey
from django.utils.encoding import is_protected_type
class Serializer(base.Serializer):
"""
Serialize a QuerySet to basic Python objects.
"""
internal_use_only = True
def start_serialization(self):
self._current = None
self.objects = []
def end_serialization(self):
pass
def start_object(self, obj):
self._current = {}
def end_object(self, obj):
self.objects.append(self.get_dump_object(obj))
self._current = None
def get_dump_object(self, obj):
data = {"model": str(obj._meta)}
if not self.use_natural_primary_keys or not hasattr(obj, "natural_key"):
data["pk"] = self._value_from_field(obj, obj._meta.pk)
data["fields"] = self._current
return data
def _value_from_field(self, obj, field):
if isinstance(field, CompositePrimaryKey):
return [self._value_from_field(obj, f) for f in field]
value = field.value_from_object(obj)
# Protected types (i.e., primitives like None, numbers, dates,
# and Decimals) are passed through as is. All other values are
# converted to string first.
return value if is_protected_type(value) else field.value_to_string(obj)
def handle_field(self, obj, field):
self._current[field.name] = self._value_from_field(obj, field)
def handle_fk_field(self, obj, field):
if self.use_natural_foreign_keys and hasattr(
field.remote_field.model, "natural_key"
):
related = getattr(obj, field.name)
if related:
value = related.natural_key()
else:
value = None
else:
value = self._value_from_field(obj, field)
self._current[field.name] = value
def handle_m2m_field(self, obj, field):
if field.remote_field.through._meta.auto_created:
if self.use_natural_foreign_keys and hasattr(
field.remote_field.model, "natural_key"
):
def m2m_value(value):
return value.natural_key()
def queryset_iterator(obj, field):
attr = getattr(obj, field.name)
chunk_size = (
2000 if getattr(attr, "prefetch_cache_name", None) else None
)
return attr.iterator(chunk_size)
else:
def m2m_value(value):
return self._value_from_field(value, value._meta.pk)
def queryset_iterator(obj, field):
query_set = getattr(obj, field.name).select_related(None).only("pk")
chunk_size = 2000 if query_set._prefetch_related_lookups else None
return query_set.iterator(chunk_size=chunk_size)
m2m_iter = getattr(obj, "_prefetched_objects_cache", {}).get(
field.name,
queryset_iterator(obj, field),
)
self._current[field.name] = [m2m_value(related) for related in m2m_iter]
def getvalue(self):
return self.objects
class Deserializer(base.Deserializer):
"""
Deserialize simple Python objects back into Django ORM instances.
It's expected that you pass the Python objects themselves (instead of a
stream or a string) to the constructor
"""
def __init__(
self, object_list, *, using=DEFAULT_DB_ALIAS, ignorenonexistent=False, **options
):
super().__init__(object_list, **options)
self.handle_forward_references = options.pop("handle_forward_references", False)
self.using = using
self.ignorenonexistent = ignorenonexistent
self.field_names_cache = {} # Model: <list of field_names>
self._iterator = None
def __iter__(self):
for obj in self.stream:
yield from self._handle_object(obj)
def __next__(self):
if self._iterator is None:
self._iterator = iter(self)
return next(self._iterator)
def _handle_object(self, obj):
data = {}
m2m_data = {}
deferred_fields = {}
# Look up the model and starting build a dict of data for it.
try:
Model = self._get_model_from_node(obj["model"])
except base.DeserializationError:
if self.ignorenonexistent:
return
raise
if "pk" in obj:
try:
data[Model._meta.pk.attname] = Model._meta.pk.to_python(obj.get("pk"))
except Exception as e:
raise base.DeserializationError.WithData(
e, obj["model"], obj.get("pk"), None
)
if Model not in self.field_names_cache:
self.field_names_cache[Model] = {f.name for f in Model._meta.get_fields()}
field_names = self.field_names_cache[Model]
# Handle each field
for field_name, field_value in obj["fields"].items():
if self.ignorenonexistent and field_name not in field_names:
# skip fields no longer on model
continue
field = Model._meta.get_field(field_name)
# Handle M2M relations
if field.remote_field and isinstance(
field.remote_field, models.ManyToManyRel
):
try:
values = self._handle_m2m_field_node(field, field_value)
if values == base.DEFER_FIELD:
deferred_fields[field] = field_value
else:
m2m_data[field.name] = values
except base.M2MDeserializationError as e:
raise base.DeserializationError.WithData(
e.original_exc, obj["model"], obj.get("pk"), e.pk
)
# Handle FK fields
elif field.remote_field and isinstance(
field.remote_field, models.ManyToOneRel
):
try:
value = self._handle_fk_field_node(field, field_value)
if value == base.DEFER_FIELD:
deferred_fields[field] = field_value
else:
data[field.attname] = value
except Exception as e:
raise base.DeserializationError.WithData(
e, obj["model"], obj.get("pk"), field_value
)
# Handle all other fields
else:
try:
data[field.name] = field.to_python(field_value)
except Exception as e:
raise base.DeserializationError.WithData(
e, obj["model"], obj.get("pk"), field_value
)
model_instance = base.build_instance(Model, data, self.using)
yield base.DeserializedObject(model_instance, m2m_data, deferred_fields)
def _handle_m2m_field_node(self, field, field_value):
return base.deserialize_m2m_values(
field, field_value, self.using, self.handle_forward_references
)
def _handle_fk_field_node(self, field, field_value):
return base.deserialize_fk_value(
field, field_value, self.using, self.handle_forward_references
)
@staticmethod
def _get_model_from_node(model_identifier):
"""Look up a model from an "app_label.model_name" string."""
try:
return apps.get_model(model_identifier)
except (LookupError, TypeError):
raise base.DeserializationError(
f"Invalid model identifier: {model_identifier}"
)

View File

@@ -0,0 +1,87 @@
"""
YAML serializer.
Requires PyYaml (https://pyyaml.org/), but that's checked for in __init__.
"""
import collections
import datetime
import decimal
import yaml
from django.core.serializers.base import DeserializationError
from django.core.serializers.python import Deserializer as PythonDeserializer
from django.core.serializers.python import Serializer as PythonSerializer
# Use the C (faster) implementation if possible
try:
from yaml import CSafeDumper as SafeDumper
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeDumper, SafeLoader
class DjangoSafeDumper(SafeDumper):
def represent_decimal(self, data):
return self.represent_scalar("tag:yaml.org,2002:str", str(data))
def represent_ordered_dict(self, data):
return self.represent_mapping("tag:yaml.org,2002:map", data.items())
DjangoSafeDumper.add_representer(decimal.Decimal, DjangoSafeDumper.represent_decimal)
DjangoSafeDumper.add_representer(
collections.OrderedDict, DjangoSafeDumper.represent_ordered_dict
)
# Workaround to represent dictionaries in insertion order.
# See https://github.com/yaml/pyyaml/pull/143.
DjangoSafeDumper.add_representer(dict, DjangoSafeDumper.represent_ordered_dict)
class Serializer(PythonSerializer):
"""Convert a queryset to YAML."""
internal_use_only = False
def _value_from_field(self, obj, field):
# A nasty special case: base YAML doesn't support serialization of time
# types (as opposed to dates or datetimes, which it does support). Since
# we want to use the "safe" serializer for better interoperability, we
# need to do something with those pesky times. Converting 'em to strings
# isn't perfect, but it's better than a "!!python/time" type which would
# halt deserialization under any other language.
value = super()._value_from_field(obj, field)
if isinstance(value, datetime.time):
value = str(value)
return value
def end_serialization(self):
self.options.setdefault("allow_unicode", True)
yaml.dump(self.objects, self.stream, Dumper=DjangoSafeDumper, **self.options)
def getvalue(self):
# Grandparent super
return super(PythonSerializer, self).getvalue()
class Deserializer(PythonDeserializer):
"""Deserialize a stream or string of YAML data."""
def __init__(self, stream_or_string, **options):
stream = stream_or_string
if isinstance(stream_or_string, bytes):
stream = stream_or_string.decode()
try:
objects = yaml.load(stream, Loader=SafeLoader)
except Exception as exc:
raise DeserializationError() from exc
super().__init__(objects, **options)
def _handle_object(self, obj):
try:
yield from super()._handle_object(obj)
except (GeneratorExit, DeserializationError):
raise
except Exception as exc:
raise DeserializationError(f"Error deserializing object: {exc}") from exc

View File

@@ -0,0 +1,501 @@
"""
XML serializer.
"""
import json
from xml.dom import pulldom
from xml.sax import handler
from xml.sax.expatreader import ExpatParser as _ExpatParser
from django.apps import apps
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
class Serializer(base.Serializer):
"""Serialize a QuerySet to XML."""
def indent(self, level):
if self.options.get("indent") is not None:
self.xml.ignorableWhitespace(
"\n" + " " * self.options.get("indent") * level
)
def start_serialization(self):
"""
Start serialization -- open the XML document and the root element.
"""
self.xml = SimplerXMLGenerator(
self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET)
)
self.xml.startDocument()
self.xml.startElement("django-objects", {"version": "1.0"})
def end_serialization(self):
"""
End serialization -- end the document.
"""
self.indent(0)
self.xml.endElement("django-objects")
self.xml.endDocument()
def start_object(self, obj):
"""
Called as each object is handled.
"""
if not hasattr(obj, "_meta"):
raise base.SerializationError(
"Non-model object (%s) encountered during serialization" % type(obj)
)
self.indent(1)
attrs = {"model": str(obj._meta)}
if not self.use_natural_primary_keys or not hasattr(obj, "natural_key"):
obj_pk = obj.pk
if obj_pk is not None:
attrs["pk"] = obj._meta.pk.value_to_string(obj)
self.xml.startElement("object", attrs)
def end_object(self, obj):
"""
Called after handling all fields for an object.
"""
self.indent(1)
self.xml.endElement("object")
def handle_field(self, obj, field):
"""
Handle each field on an object (except for ForeignKeys and
ManyToManyFields).
"""
self.indent(2)
self.xml.startElement(
"field",
{
"name": field.name,
"type": field.get_internal_type(),
},
)
# Get a "string version" of the object's data.
if getattr(obj, field.name) is not None:
value = field.value_to_string(obj)
if field.get_internal_type() == "JSONField":
# Dump value since JSONField.value_to_string() doesn't output
# strings.
value = json.dumps(value, cls=field.encoder)
try:
self.xml.characters(value)
except UnserializableContentError:
raise ValueError(
"%s.%s (pk:%s) contains unserializable characters"
% (obj.__class__.__name__, field.name, obj.pk)
)
else:
self.xml.addQuickElement("None")
self.xml.endElement("field")
def handle_fk_field(self, obj, field):
"""
Handle a ForeignKey (they need to be treated slightly
differently from regular fields).
"""
self._start_relational_field(field)
related_att = getattr(obj, field.attname)
if related_att is not None:
if self.use_natural_foreign_keys and hasattr(
field.remote_field.model, "natural_key"
):
related = getattr(obj, field.name)
# If related object has a natural key, use it
related = related.natural_key()
# Iterable natural keys are rolled out as subelements
for key_value in related:
self.xml.startElement("natural", {})
self.xml.characters(str(key_value))
self.xml.endElement("natural")
else:
self.xml.characters(str(related_att))
else:
self.xml.addQuickElement("None")
self.xml.endElement("field")
def handle_m2m_field(self, obj, field):
"""
Handle a ManyToManyField. Related objects are only serialized as
references to the object's PK (i.e. the related *data* is not dumped,
just the relation).
"""
if field.remote_field.through._meta.auto_created:
self._start_relational_field(field)
if self.use_natural_foreign_keys and hasattr(
field.remote_field.model, "natural_key"
):
# If the objects in the m2m have a natural key, use it
def handle_m2m(value):
natural = value.natural_key()
# Iterable natural keys are rolled out as subelements
self.xml.startElement("object", {})
for key_value in natural:
self.xml.startElement("natural", {})
self.xml.characters(str(key_value))
self.xml.endElement("natural")
self.xml.endElement("object")
def queryset_iterator(obj, field):
attr = getattr(obj, field.name)
chunk_size = (
2000 if getattr(attr, "prefetch_cache_name", None) else None
)
return attr.iterator(chunk_size)
else:
def handle_m2m(value):
self.xml.addQuickElement("object", attrs={"pk": str(value.pk)})
def queryset_iterator(obj, field):
query_set = getattr(obj, field.name).select_related(None).only("pk")
chunk_size = 2000 if query_set._prefetch_related_lookups else None
return query_set.iterator(chunk_size=chunk_size)
m2m_iter = getattr(obj, "_prefetched_objects_cache", {}).get(
field.name,
queryset_iterator(obj, field),
)
for relobj in m2m_iter:
handle_m2m(relobj)
self.xml.endElement("field")
def _start_relational_field(self, field):
"""Output the <field> element for relational fields."""
self.indent(2)
self.xml.startElement(
"field",
{
"name": field.name,
"rel": field.remote_field.__class__.__name__,
"to": str(field.remote_field.model._meta),
},
)
class Deserializer(base.Deserializer):
"""Deserialize XML."""
def __init__(
self,
stream_or_string,
*,
using=DEFAULT_DB_ALIAS,
ignorenonexistent=False,
**options,
):
super().__init__(stream_or_string, **options)
self.handle_forward_references = options.pop("handle_forward_references", False)
self.event_stream = pulldom.parse(self.stream, self._make_parser())
self.db = using
self.ignore = ignorenonexistent
def _make_parser(self):
"""Create a hardened XML parser (no custom/external entities)."""
return DefusedExpatParser()
def __next__(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
def _handle_object(self, node):
"""Convert an <object> node to a DeserializedObject."""
# Look up the model using the model loading mechanism. If this fails,
# bail.
Model = self._get_model_from_node(node, "model")
# Start building a data dictionary from the object.
data = {}
if node.hasAttribute("pk"):
data[Model._meta.pk.attname] = Model._meta.pk.to_python(
node.getAttribute("pk")
)
# Also start building a dict of m2m data (this is saved as
# {m2m_accessor_attribute : [list_of_related_objects]})
m2m_data = {}
deferred_fields = {}
field_names = {f.name for f in Model._meta.get_fields()}
# Deserialize each field.
for field_node in node.getElementsByTagName("field"):
# If the field is missing the name attribute, bail (are you
# sensing a pattern here?)
field_name = field_node.getAttribute("name")
if not field_name:
raise base.DeserializationError(
"<field> node is missing the 'name' attribute"
)
# Get the field from the Model. This will raise a
# FieldDoesNotExist if, well, the field doesn't exist, which will
# be propagated correctly unless ignorenonexistent=True is used.
if self.ignore and field_name not in field_names:
continue
field = Model._meta.get_field(field_name)
# As is usually the case, relation fields get the special treatment.
if field.remote_field and isinstance(
field.remote_field, models.ManyToManyRel
):
value = self._handle_m2m_field_node(field_node, field)
if value == base.DEFER_FIELD:
deferred_fields[field] = [
[
getInnerText(nat_node).strip()
for nat_node in obj_node.getElementsByTagName("natural")
]
for obj_node in field_node.getElementsByTagName("object")
]
else:
m2m_data[field.name] = value
elif field.remote_field and isinstance(
field.remote_field, models.ManyToOneRel
):
value = self._handle_fk_field_node(field_node, field)
if value == base.DEFER_FIELD:
deferred_fields[field] = [
getInnerText(k).strip()
for k in field_node.getElementsByTagName("natural")
]
else:
data[field.attname] = value
else:
if field_node.getElementsByTagName("None"):
value = None
else:
value = field.to_python(getInnerText(field_node).strip())
# Load value since JSONField.to_python() outputs strings.
if field.get_internal_type() == "JSONField":
value = json.loads(value, cls=field.decoder)
data[field.name] = value
obj = base.build_instance(Model, data, self.db)
# Return a DeserializedObject so that the m2m data has a place to live.
return base.DeserializedObject(obj, m2m_data, deferred_fields)
def _handle_fk_field_node(self, node, field):
"""
Handle a <field> node for a ForeignKey
"""
# Check if there is a child node named 'None', returning None if so.
if node.getElementsByTagName("None"):
return None
else:
model = field.remote_field.model
if hasattr(model._default_manager, "get_by_natural_key"):
keys = node.getElementsByTagName("natural")
if keys:
# If there are 'natural' subelements, it must be a natural key
field_value = [getInnerText(k).strip() for k in keys]
try:
obj = model._default_manager.db_manager(
self.db
).get_by_natural_key(*field_value)
except ObjectDoesNotExist:
if self.handle_forward_references:
return base.DEFER_FIELD
else:
raise
obj_pk = getattr(obj, field.remote_field.field_name)
# If this is a natural foreign key to an object that
# has a FK/O2O as the foreign key, use the FK value
if field.remote_field.model._meta.pk.remote_field:
obj_pk = obj_pk.pk
else:
# Otherwise, treat like a normal PK
field_value = getInnerText(node).strip()
obj_pk = model._meta.get_field(
field.remote_field.field_name
).to_python(field_value)
return obj_pk
else:
field_value = getInnerText(node).strip()
return model._meta.get_field(field.remote_field.field_name).to_python(
field_value
)
def _handle_m2m_field_node(self, node, field):
"""
Handle a <field> node for a ManyToManyField.
"""
model = field.remote_field.model
default_manager = model._default_manager
if hasattr(default_manager, "get_by_natural_key"):
def m2m_convert(n):
keys = n.getElementsByTagName("natural")
if keys:
# If there are 'natural' subelements, it must be a natural key
field_value = [getInnerText(k).strip() for k in keys]
obj_pk = (
default_manager.db_manager(self.db)
.get_by_natural_key(*field_value)
.pk
)
else:
# Otherwise, treat like a normal PK value.
obj_pk = model._meta.pk.to_python(n.getAttribute("pk"))
return obj_pk
else:
def m2m_convert(n):
return model._meta.pk.to_python(n.getAttribute("pk"))
values = []
try:
for c in node.getElementsByTagName("object"):
values.append(m2m_convert(c))
except Exception as e:
if isinstance(e, ObjectDoesNotExist) and self.handle_forward_references:
return base.DEFER_FIELD
else:
raise base.M2MDeserializationError(e, c)
else:
return values
def _get_model_from_node(self, node, attr):
"""
Look up a model from a <object model=...> or a <field rel=... to=...>
node.
"""
model_identifier = node.getAttribute(attr)
if not model_identifier:
raise base.DeserializationError(
"<%s> node is missing the required '%s' attribute"
% (node.nodeName, attr)
)
try:
return apps.get_model(model_identifier)
except (LookupError, TypeError):
raise base.DeserializationError(
"<%s> node has invalid model identifier: '%s'"
% (node.nodeName, model_identifier)
)
def getInnerText(node):
"""Get all the inner text of a DOM node (recursively)."""
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
inner_text = []
for child in node.childNodes:
if (
child.nodeType == child.TEXT_NODE
or child.nodeType == child.CDATA_SECTION_NODE
):
inner_text.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
inner_text.extend(getInnerText(child))
else:
pass
return "".join(inner_text)
# Below code based on Christian Heimes' defusedxml
class DefusedExpatParser(_ExpatParser):
"""
An expat parser hardened against XML bomb attacks.
Forbid DTDs, external entity references
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.setFeature(handler.feature_external_ges, False)
self.setFeature(handler.feature_external_pes, False)
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise DTDForbidden(name, sysid, pubid)
def entity_decl(
self, name, is_parameter_entity, value, base, sysid, pubid, notation_name
):
raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
def external_entity_ref_handler(self, context, base, sysid, pubid):
raise ExternalReferenceForbidden(context, base, sysid, pubid)
def reset(self):
_ExpatParser.reset(self)
parser = self._parser
parser.StartDoctypeDeclHandler = self.start_doctype_decl
parser.EntityDeclHandler = self.entity_decl
parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
parser.ExternalEntityRefHandler = self.external_entity_ref_handler
class DefusedXmlException(ValueError):
"""Base exception."""
def __repr__(self):
return str(self)
class DTDForbidden(DefusedXmlException):
"""Document type definition is forbidden."""
def __init__(self, name, sysid, pubid):
super().__init__()
self.name = name
self.sysid = sysid
self.pubid = pubid
def __str__(self):
tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})"
return tpl.format(self.name, self.sysid, self.pubid)
class EntitiesForbidden(DefusedXmlException):
"""Entity definition is forbidden."""
def __init__(self, name, value, base, sysid, pubid, notation_name):
super().__init__()
self.name = name
self.value = value
self.base = base
self.sysid = sysid
self.pubid = pubid
self.notation_name = notation_name
def __str__(self):
tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})"
return tpl.format(self.name, self.sysid, self.pubid)
class ExternalReferenceForbidden(DefusedXmlException):
"""Resolving an external reference is forbidden."""
def __init__(self, context, base, sysid, pubid):
super().__init__()
self.context = context
self.base = base
self.sysid = sysid
self.pubid = pubid
def __str__(self):
tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})"
return tpl.format(self.sysid, self.pubid)