Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,66 @@ of create and modify timestamps for every AVU returned from the server:
datetime.datetime(2022, 9, 19, 15, 26, 7)
```

Disabling AVU reloads from the iRODS server
-------------------------------------------

With the default setting of `reload = True`, an `iRODSMetaCollection` will
proactively read all current AVUs back from the iRODS server after any
metadata write done by the client. This helps methods such as `items()`
to return an up-to-date result. Setting `reload = False` can, however, greatly
increase code efficiency if for example a lot of AVUs must be added or deleted
at once without reading any back again.

```py
# Make a metadata view in which AVUs are not reloaded, for quick update:
non_current_metadata_view = obj.metadata(reload = False)
for i in range(10):
non_current_metadata_view.add("my_key", "my_value_"+str(i))

# Force reload of AVUs and display:
current_metadata = obj.metadata().items()
print(f"{current_metadata = }")
```

Subclassing `iRODSMeta`
---------------------
The keyword option `iRODSMeta_type` can be used to set up any `iRODSMeta`
subclass as the translator between native iRODS metadata APIs
and the way in which the AVUs thus conveyed should be represented to the
client.

An example is the `irods.meta.iRODSBinOrStringMeta` class which uses the
`base64` module to "hide" arbitrary bytestrings within the `value` and
`units` attributes of an iRODS metadata AVU:

```py
from irods.meta import iRODSBinOrStringMeta as MyMeta
d = session.data_objects.get('/path/to/object')
unencodable_octets = '\u1000'.encode('utf8')[:-1]

# Use our custom client-metadata type to store arbitrary octet strings.
meta_view = d.metadata(iRODSMeta_type = MyMeta)
meta_view.set(m1 := MyMeta('mybinary', unencodable_octets, b'\x02'))

# Show that traditional AVU's can exist alongside the custom kind.
irods.client_configuration.connections.xml_parser_default = 'QUASI_XML'
meta_view.set(m2 := MyMeta('mytext', '\1', '\2'))

try:
# These two lines are equivalent.
assert {m1,m2} <= (all_avus := set(meta_view.items()))
assert {tuple(m1),tuple(m2)} <= all_avus
finally:
del meta_view['mytext'], meta_view['mybinary']
```

Whereas the content of native iRODS AVUs must obey some valid text encoding as
determined by the resident iRODS catalog, the above is a possible alternative - albeit
one semantically bound to the local application that defines the needed
translations. Still, this can be a valid usage for users who need a guarantee
that any given octet string they might generate can be placed into metadata without
violating standard text encodings.

Atomic operations on metadata
-----------------------------

Expand Down
54 changes: 36 additions & 18 deletions irods/manager/metadata_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,45 @@


class MetadataManager(Manager):

def __init__(self, *_):

Check failure on line 32 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff D107

D107: Missing docstring in `__init__` [pydocstyle:undocumented-public-init]
self._opts = {
'admin':False,
'timestamps':False,
'iRODSMeta_type':iRODSMeta
}

Check failure on line 37 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
super().__init__(*_)

@property
def use_timestamps(self):
return getattr(self, "_use_ts", False)
return self._opts['timestamps']

__kw : Dict[str, Any] = {} # default (empty) keywords


def _updated_keywords(self, opts):
kw_ = self.__kw.copy()
kw_.update(opts)
return kw_

def __call__(self, admin=False, timestamps=False, **irods_kw_opt):
if admin:
irods_kw_opt.update([(kw.ADMIN_KW, "")])
def get_api_keywords(self): return self.__kw.copy()

Check failure on line 52 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting

Check failure on line 52 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff D102

D102: Missing docstring in public method [pydocstyle:undocumented-public-method]

def __call__(self, **flags):

Check failure on line 54 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff D102

D102: Missing docstring in public method [pydocstyle:undocumented-public-method]
# Make a new shallow copy of the manager object, but update options from parameter list.
new_self = copy.copy(self)
new_self._use_ts = timestamps
new_self.__kw = irods_kw_opt
new_self._opts = copy.copy(self._opts)

Check failure on line 57 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_opts` [flake8-self:private-member-access]

# Update the flags that do bookkeeping in the returned(new) manager object.
new_self._opts.update(

Check failure on line 60 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_opts` [flake8-self:private-member-access]
(key,val) for key,val in flags.items() if val is not None
)

Check failure on line 62 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting

# Update the ADMIN_KW flag in the returned(new) object.
if new_self._opts.get('admin'):

Check failure on line 65 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_opts` [flake8-self:private-member-access]
self.__kw[kw.ADMIN_KW] = ""
else:
self.__kw.pop(kw.ADMIN_KW, None)

return new_self

@staticmethod
Expand All @@ -67,6 +88,9 @@
}[model_cls]

def get(self, model_cls, path):
if not path:
# Short circuit. This should be of the same type as the object returned at the function's end.
return []
resource_type = self._model_class_to_resource_type(model_cls)
model = {
"d": DataObjectMeta,
Expand Down Expand Up @@ -96,9 +120,9 @@
return opts

return [
iRODSMeta(
row[model.name], row[model.value], row[model.units], **meta_opts(row)
)
self._opts['iRODSMeta_type'](None,None,None)._from_column_triple(

Check failure on line 123 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_from_column_triple` [flake8-self:private-member-access]
row[model.name], row[model.value], row[model.units],
**meta_opts(row))

Check failure on line 125 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
for row in results
]

Expand All @@ -109,9 +133,7 @@
"add",
"-" + resource_type,
path,
meta.name,
meta.value,
meta.units,
*meta._to_column_triple(),

Check failure on line 136 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_to_column_triple` [flake8-self:private-member-access]
**self._updated_keywords(opts)
)
request = iRODSMessage(
Expand All @@ -128,9 +150,7 @@
"rm",
"-" + resource_type,
path,
meta.name,
meta.value,
meta.units,
*meta._to_column_triple(),

Check failure on line 153 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_to_column_triple` [flake8-self:private-member-access]
**self._updated_keywords(opts)
)
request = iRODSMessage(
Expand Down Expand Up @@ -167,9 +187,7 @@
"set",
"-" + resource_type,
path,
meta.name,
meta.value,
meta.units,
*meta._to_column_triple(),

Check failure on line 190 in irods/manager/metadata_manager.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-check

Ruff SLF001

SLF001: Private member accessed: `_to_column_triple` [flake8-self:private-member-access]
**self._updated_keywords(opts)
)
request = iRODSMessage(
Expand Down
74 changes: 61 additions & 13 deletions irods/meta.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,41 @@
import base64
import copy


class iRODSMeta:

def _to_column_triple(self):
return (self.name ,self.forward_translate(self.value)) + (('',) if not self.units else (self.forward_translate(self.units),))

def _from_column_triple(self, name, value, units, **kw):
self.__low_level_init(name,
self.reverse_translate(value),
units=None if not units else self.reverse_translate(units),
**kw)
return self

reverse_translate = forward_translate = staticmethod(lambda _:_)

Check failure on line 17 in irods/meta.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting

INIT_KW_ARGS = 'units avu_id create_time modify_time'.split()

def __init__(
self, name, value, units=None, avu_id=None, create_time=None, modify_time=None
self, name, value, /, units=None, *, avu_id=None, create_time=None, modify_time=None,
):
self.avu_id = avu_id
# Defer initialization for iRODSMeta(attribute,value,...) if neither attribute nor value is True under
# a 'bool' transformation. In so doing we streamline initialization for iRODSMeta (and any subclasses)
# for alternatively populating via _from_column_triple(...).
# This is the pathway for allowing user-defined encodings of the iRODSMeta (byte-)string AVU components.
if name or value:
# Note: calling locals() inside the dict comprehension would not access variables in this frame.
local_vars = locals()
kw = {name:local_vars.get(name) for name in self.INIT_KW_ARGS}

Check failure on line 31 in irods/meta.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
self.__low_level_init(name, value, **kw)

def __low_level_init(self, name, value, **kw):
self.name = name
self.value = value
self.units = units
self.create_time = create_time
self.modify_time = modify_time
for attr in self.INIT_KW_ARGS:
setattr(self, attr, kw.get(attr))

def __eq__(self, other):
return tuple(self) == tuple(other)
Expand All @@ -20,7 +47,22 @@
yield self.units

def __repr__(self):
return "<iRODSMeta {avu_id} {name} {value} {units}>".format(**vars(self))
return f"<{self.__class__.__name__} {self.avu_id} {self.name} {self.value} {self.units}>"

def __hash__(self):
return hash(tuple(self))

class iRODSBinOrStringMeta(iRODSMeta):

Check failure on line 56 in irods/meta.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
@staticmethod
def reverse_translate(value):
"""Translate an AVU field from its iRODS object-database form into the client representation of that field."""
return value if value[0] != '\\' else base64.decodebytes(value[1:].encode('utf8'))

@staticmethod
def forward_translate(value):
"""Translate an AVU field from the form it takes in the client, into an iRODS object-database compatible form."""
return b'\\' + base64.encodebytes(value).strip() if isinstance(value,(bytes,bytearray)) else value


class BadAVUOperationKeyword(Exception):
Expand Down Expand Up @@ -84,14 +126,16 @@
setattr(self, atr, locals()[atr])


import copy


class iRODSMetaCollection:

def __call__(self, admin=False, timestamps=False, **opts):
def __call__(self, **opts):

Check failure on line 131 in irods/meta.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
"""Optional parameters in **opts are:
admin (default: False): apply ADMIN_KW to future metadata operations.
timestamps (default: False): attach (ctime,mtime) timestamp attributes to AVUs received from iRODS.
"""
x = copy.copy(self)
x._manager = (x._manager)(admin, timestamps, **opts)
x._manager = (x._manager)(**opts)
x._reset_metadata()
return x

Expand All @@ -102,7 +146,11 @@
self._reset_metadata()

def _reset_metadata(self):
self._meta = self._manager.get(self._model_cls, self._path)
m = self._manager
if not hasattr(self,"_meta"):
self._meta = m.get(None, "")
if m._opts.setdefault('reload',True):

Check failure on line 152 in irods/meta.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
self._meta = m.get(self._model_cls, self._path)

def get_all(self, key):
"""
Expand All @@ -129,7 +177,7 @@
def _get_meta(self, *args):
if not len(args):
raise ValueError("Must specify an iRODSMeta object or key, value, units)")
return args[0] if len(args) == 1 else iRODSMeta(*args)
return args[0] if len(args) == 1 else self._manager._opts['iRODSMeta_type'](*args)

def apply_atomic_operations(self, *avu_ops):
self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops)
Expand Down
62 changes: 62 additions & 0 deletions irods/test/meta_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
AVUOperation,
BadAVUOperationValue,
BadAVUOperationKeyword,
iRODSBinOrStringMeta,
)
from irods.models import DataObject, Collection, Resource, CollectionMeta
import irods.test.helpers as helpers
Expand Down Expand Up @@ -798,6 +799,67 @@
# in use, with the "odd" characters being present in the metadata value.
del obj.metadata[attr_str]

def test_binary_avu_fields__issue_707(self):
meta_coll = self.obj.metadata(iRODSMeta_type=iRODSBinOrStringMeta)
illegal_unicode_sequence = '\u1000'.encode('utf8')[:2]
avu_name = 'issue709'
meta_coll.set(
avu_name,
(value:=b'value_'+illegal_unicode_sequence),
(units:=b'units_'+illegal_unicode_sequence)
)

self.assertEqual(
meta_coll.get_one(avu_name),
(avu_name, value, units)
)
meta_coll.add(*(new_avu:=iRODSMeta(avu_name, '\u1000', '\u1001')))
relevant_avus = meta_coll.get_all(avu_name)
self.assertIn(new_avu, relevant_avus)

def test_cascading_changes_of_metadata_manager_options__issue_709(self):
d = None
get_option = lambda metacoll, key: metacoll._manager._opts[key]
try:
d = self.sess.data_objects.create(f'{self.coll.path}/issue_709_test_1')
m = d.metadata
self.assertEqual(get_option(m,'admin'),False)

m2 = m(admin = True)
self.assertEqual(get_option(m2,'timestamps'),False)
self.assertEqual(get_option(m2,'admin'),True)

m3 = m2(timestamps = True)
self.assertEqual(get_option(m3,'timestamps'), True)
self.assertEqual(get_option(m3,'admin'), True)
self.assertEqual(m3._manager.get_api_keywords().get(kw.ADMIN_KW), "")

m4 = m3(admin = False)
self.assertEqual(get_option(m4,'admin'), False)
self.assertEqual(m4._manager.get_api_keywords().get(kw.ADMIN_KW), None)
finally:
if d:
d.unlink(force=True)

def test_reload_can_be_deactivated__issue_768(self):
# Set an initial AVU
metacoll = self.obj.metadata
metacoll.set(item_1:=iRODSMeta('aa','bb','cc'))

# Initial defaults will always reload the AVU list from the server, so new AVU should be seen.
self.assertIn(item_1, metacoll.items())

# Setting reload option to False will prevent reload of object AVUs, so an AVU just set should not be seen.
metacoll_2 = metacoll(reload=False)
metacoll_2.set(item_2:=iRODSMeta('xx','yy','zz'))

Check failure on line 854 in irods/test/meta_test.py

View workflow job for this annotation

GitHub Actions / ruff-lint / ruff-format

Ruff format

Improper formatting
items = metacoll_2.items()
self.assertIn(item_1, items)
self.assertNotIn(item_2, items)

# Restore old setting. Check that both AVUs are seen as present.
items_reloaded = metacoll_2(reload=True).items()
self.assertIn(item_1, items_reloaded)
self.assertIn(item_2, items_reloaded)

if __name__ == "__main__":
# let the tests find the parent irods lib
Expand Down