Skip to content

Commit 54c3800

Browse files
authored
Merge pull request #115 from ArcanaFramework/vendor-splitout
split out microsoft vendor types to fileformats.vendor namespace
2 parents c0e6059 + ff669e7 commit 54c3800

File tree

21 files changed

+395
-42
lines changed

21 files changed

+395
-42
lines changed

.github/workflows/ci-cd.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- name: Install Package
4747
run: python3 -m pip install -e .[test]
4848
- name: Install Extras Package
49-
run: python3 -m pip install -e ./extras[test]
49+
run: python3 -m pip install -e ./extras[test,application,image,vendor_openxmlformats_officedocument]
5050
- name: MyPy
5151
run: mypy --install-types --non-interactive --no-warn-unused-ignores .
5252
- name: Pytest
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import typing as ty
2+
3+
from docx import Document
4+
from docx.document import Document as DocumentObject
5+
6+
from fileformats.core import FileSet, extra_implementation
7+
from fileformats.vendor.openxmlformats_officedocument.application import (
8+
Wordprocessingml_Document as MswordX,
9+
)
10+
11+
12+
@extra_implementation(FileSet.load)
13+
def load_docx(doc: MswordX, **kwargs: ty.Any) -> DocumentObject:
14+
return Document(str(doc)) # type: ignore[no-any-return]
15+
16+
17+
@extra_implementation(FileSet.save)
18+
def save_docx(doc: MswordX, data: DocumentObject, **kwargs: ty.Any) -> None:
19+
data.save(str(doc))

extras/pyproject.toml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@ readme = "README.rst"
99
requires-python = ">=3.11"
1010
dependencies = [
1111
"fileformats",
12-
"imageio >=2.24.0",
1312
"pydra >=1.0a",
14-
"PyYAML>=6.0",
15-
"pydicom >=2.3",
16-
"medimages4tests",
1713
]
1814
license = { file = "LICENSE" }
1915
authors = [{ name = "Thomas G. Close", email = "tom.g.close@gmail.com" }]
@@ -43,6 +39,18 @@ test = [
4339
"codecov",
4440
"medimages4tests",
4541
]
42+
application = [
43+
"PyYAML>=6.0",
44+
"pydicom >=2.3",
45+
"medimages4tests",
46+
]
47+
image = [
48+
"imageio >=2.24.0",
49+
]
50+
vendor_openxmlformats_officedocument = [
51+
"python-docx >= 1.2.0",
52+
]
53+
4654

4755
[project.urls]
4856
repository = "https://github.com/ArcanaFramework/fileformats"

fileformats/application/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from fileformats.core import __version__
22

33
from .archive import Archive, Bzip, Gzip, Tar, TarGzip, Zip
4-
from .document import Document, Msword, MswordX, Pdf, Postscript
4+
from .document import Document, Msword, Pdf, Postscript
55
from .medical import Dicom
66
from .misc import (
77
H224,
@@ -492,6 +492,7 @@
492492
_3gpphalforms__Json,
493493
_3gppIms__Xml,
494494
)
495+
from .presentation import Presentation
495496
from .serialization import (
496497
InformalSchema,
497498
Json,
@@ -503,8 +504,12 @@
503504
XmlSchema,
504505
Yaml,
505506
)
507+
from .spreadsheet import Spreadsheet
506508

507509
from fileformats.text import Javascript # isort: skip
510+
from fileformats.vendor.openxmlformats_officedocument.application import ( # isort:skip
511+
Wordprocessingml_Document as MswordX,
512+
)
508513

509514
__all__ = [
510515
"__version__",
@@ -833,6 +838,7 @@
833838
"Pls__Xml",
834839
"PocSettings__Xml",
835840
"PpspTracker__Json",
841+
"Presentation",
836842
"Problem__Json",
837843
"Problem__Xml",
838844
"Provenance__Xml",
@@ -922,6 +928,7 @@
922928
"Spdx__Json",
923929
"SparqlResults__Xml",
924930
"SpiritsEvent__Xml",
931+
"Spreadsheet",
925932
"Sql",
926933
"Srgs",
927934
"Srgs__Xml",

fileformats/application/document.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from fileformats.core.mixin import WithMagicNumber
22
from fileformats.generic import BinaryFile
33

4-
from .archive import Zip
5-
64

75
# Document formats
86
class Document(BinaryFile):
@@ -25,16 +23,11 @@ class Msword(Document):
2523
iana_mime = "application/msword"
2624

2725

28-
class MswordX(Zip, Document):
29-
ext = ".docx"
30-
31-
iana_mime = (
32-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
33-
)
34-
35-
3626
class Postscript(WithMagicNumber, Document):
3727
ext = ".eps"
3828
alternate_exts = (".ps",)
3929
magic_number = b"%!"
4030
iana_mime = "application/postscript"
31+
32+
33+
__all__ = ["Document", "Pdf", "Msword", "Postscript"]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from fileformats.generic import BinaryFile
2+
3+
4+
# Document formats
5+
class Presentation(BinaryFile):
6+
# iana_mime = None
7+
pass
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from fileformats.generic import BinaryFile
2+
3+
4+
# Document formats
5+
class Spreadsheet(BinaryFile):
6+
# iana_mime = None
7+
pass

fileformats/core/classifier.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,26 @@ def namespace(cls) -> ty.Optional[str]:
2020
module_parts = cls.__module__.split(".")
2121
if module_parts[0] != "fileformats":
2222
raise FormatDefinitionError(
23-
f"Cannot create reversible MIME type for {cls} as it is not in the "
24-
"fileformats namespace"
23+
f"Cannot determine namespace for {cls} format as it is not in the "
24+
"fileformats namespace package"
2525
)
26-
return module_parts[1].replace("_", "-")
26+
namespace = module_parts[1]
27+
if namespace == "vendor":
28+
if len(module_parts) < 4:
29+
raise FormatDefinitionError(
30+
f"Cannot determine namespace for vendor-specific format, {cls} it needs "
31+
"to be in a subpackage of the form `fileformats.vendor.<vendor-name>.<namespace>`,"
32+
f"found `{'.'.join(module_parts)}`"
33+
)
34+
namespace = module_parts[3]
35+
return namespace.replace("_", "-")
36+
37+
@classproperty
38+
def vendor(cls) -> ty.Optional[str]:
39+
module_parts = cls.__module__.split(".")
40+
if module_parts[0] != "fileformats" or module_parts[1] != "vendor":
41+
return None
42+
return module_parts[2].replace("_", "-")
2743

2844
def dummy(self) -> float:
2945

fileformats/core/datatype.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,11 @@ def mime_like(cls) -> str:
117117
return a MIME-like identifier, e.g. "text/plain" for fileformats.text.Plain.
118118
and "medimage/nifti" for fileformats.medimage.Nifti.
119119
"""
120-
return f"{cls.namespace}/{to_mime_format_name(cls.__name__)}" # type: ignore
120+
mime_like: str = cls.namespace + "/"
121+
if cls.vendor:
122+
mime_like += "vnd." + cls.vendor + "."
123+
mime_like += to_mime_format_name(cls.__name__) # type: ignore[attr-defined]
124+
return mime_like
121125

122126
@classmethod
123127
def from_mime(cls, mime_string: str) -> ty.Type[DataType]:
@@ -196,16 +200,25 @@ def from_mime(cls, mime_string: str) -> ty.Type[DataType]:
196200
else:
197201
klass = next(iter(matching_name))
198202
else:
199-
class_name = from_mime_format_name(format_name)
203+
# Get the path to the module to load the class from
204+
if format_name.startswith("vnd."):
205+
name_parts = format_name.split(".")
206+
vendor = name_parts[1]
207+
format_name = ".".join(name_parts[2:])
208+
module_path = f"fileformats.vendor.{vendor}.{namespace}"
209+
else:
210+
module_path = f"fileformats.{namespace}"
211+
module_path = module_path.replace("-", "_")
200212
try:
201-
module = importlib.import_module("fileformats." + namespace)
213+
module = importlib.import_module(module_path)
202214
except ImportError:
203215
raise FormatRecognitionError(
204-
f"Did not find fileformats namespace package corresponding to {namespace} "
216+
f"Did not find fileformats namespace package at '{module_path}' "
205217
f"required to interpret '{mime_string}' MIME, or MIME-like, type. "
206218
f"try installing the namespace package with "
207219
f"'python3 -m pip install fileformats-{namespace}'."
208220
) from None
221+
class_name = from_mime_format_name(format_name)
209222
try:
210223
klass = getattr(module, class_name)
211224
except AttributeError:

fileformats/core/tests/test_classifiers.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,31 @@
11
from __future__ import annotations
2+
23
import decimal
4+
35
import pytest
46
from pydra.compose import python
5-
from fileformats.core import from_mime, DataType, FileSet
6-
from fileformats.core import converter
7+
78
from fileformats.application import Zip
8-
from fileformats.generic import DirectoryOf
9-
from fileformats.field import Array, Integer, Decimal, Text, Boolean
9+
from fileformats.core import DataType, FileSet, converter, from_mime
1010
from fileformats.core.exceptions import (
11-
FormatDefinitionError,
1211
FormatConversionError,
13-
FormatRecognitionError,
12+
FormatDefinitionError,
1413
FormatMismatchError,
14+
FormatRecognitionError,
1515
)
16+
from fileformats.field import Array, Boolean, Decimal, Integer, Text
17+
from fileformats.generic import DirectoryOf
18+
from fileformats.testing import J # Y,
1619
from fileformats.testing import (
1720
A,
1821
B,
1922
C,
23+
Classified,
2024
D,
2125
E,
2226
F,
2327
G,
2428
H,
25-
J,
2629
K,
2730
L,
2831
M,
@@ -31,16 +34,13 @@
3134
Q,
3235
R,
3336
TestField,
34-
Classified,
3537
U,
3638
V,
3739
W,
3840
X,
39-
# Y,
4041
Z,
4142
)
4243

43-
4444
SpecificDataType = DataType.type_var("SpecificDataType")
4545
SpecificFileSet = FileSet.type_var("SpecificFileSet")
4646

@@ -242,9 +242,28 @@ def test_mime_roundtrips():
242242
assert from_mime("testing/b.a+k") is K[B, A]
243243
assert from_mime("testing/b.a+k") is not K[A, B]
244244

245-
with pytest.raises(FormatRecognitionError) as e:
245+
with pytest.raises(
246+
FormatRecognitionError, match="Cannot create reversible MIME type"
247+
):
246248
Array[TestField].mime_like
247-
assert "Cannot create reversible MIME type for " in str(e)
249+
250+
251+
def test_mime_fail():
252+
class BadFormat(DataType):
253+
pass
254+
255+
with pytest.raises(FormatDefinitionError, match="Cannot determine namespace"):
256+
BadFormat.namespace
257+
258+
259+
def test_vendor_mime_fail():
260+
class BadVendorFormat(DataType):
261+
pass
262+
263+
BadVendorFormat.__module__ = "fileformats.vendor.badnamespace"
264+
265+
with pytest.raises(FormatDefinitionError, match="Cannot determine namespace"):
266+
BadVendorFormat.namespace
248267

249268

250269
def test_inherited_classifiers():
@@ -430,3 +449,4 @@ def test_classifier_categories6():
430449
match="Cannot have more than one occurrence of a classifier ",
431450
):
432451
Classified[C, E]
452+
Classified[C, E]

0 commit comments

Comments
 (0)