Skip to content

Commit baf445a

Browse files
Merge pull request #1051 from guzman-raphael/json
Add `json` data type
2 parents f28a3b9 + 477d270 commit baf445a

14 files changed

+1411
-63
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
## Release notes
22

3-
### 0.14.0 -- TBA
3+
### 0.14.0 -- Feb 10, 2023
4+
- Added - `json` data type ([#245](https://github.com/datajoint/datajoint-python/issues/245)) PR [#1051](https://github.com/datajoint/datajoint-python/pull/1051)
45
- Fixed - Activating a schema requires all tables to exist even if `create_tables=False` PR [#1058](https://github.com/datajoint/datajoint-python/pull/1058)
56
- Changed - Populate call with `reserve_jobs=True` to exclude `error` and `ignore` keys - PR [#1062](https://github.com/datajoint/datajoint-python/pull/1062)
67
- Added - Support for inserting data with CSV files - PR [#1067](https://github.com/datajoint/datajoint-python/pull/1067)

datajoint/condition.py

+71-32
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,29 @@
88
import decimal
99
import numpy
1010
import pandas
11+
import json
1112
from .errors import DataJointError
1213

14+
JSON_PATTERN = re.compile(
15+
r"^(?P<attr>\w+)(\.(?P<path>[\w.*\[\]]+))?(:(?P<type>[\w(,\s)]+))?$"
16+
)
17+
18+
19+
def translate_attribute(key):
20+
match = JSON_PATTERN.match(key)
21+
if match is None:
22+
return match, key
23+
match = match.groupdict()
24+
if match["path"] is None:
25+
return match, match["attr"]
26+
else:
27+
return match, "json_value(`{}`, _utf8mb4'$.{}'{})".format(
28+
*[
29+
((f" returning {v}" if k == "type" else v) if v else "")
30+
for k, v in match.items()
31+
]
32+
)
33+
1334

1435
class PromiscuousOperand:
1536
"""
@@ -94,35 +115,56 @@ def make_condition(query_expression, condition, columns):
94115
from .expression import QueryExpression, Aggregation, U
95116

96117
def prep_value(k, v):
97-
"""prepare value v for inclusion as a string in an SQL condition"""
98-
if query_expression.heading[k].uuid:
118+
"""prepare SQL condition"""
119+
key_match, k = translate_attribute(k)
120+
if key_match["path"] is None:
121+
k = f"`{k}`"
122+
if (
123+
query_expression.heading[key_match["attr"]].json
124+
and key_match["path"] is not None
125+
and isinstance(v, dict)
126+
):
127+
return f"{k}='{json.dumps(v)}'"
128+
if v is None:
129+
return f"{k} IS NULL"
130+
if query_expression.heading[key_match["attr"]].uuid:
99131
if not isinstance(v, uuid.UUID):
100132
try:
101133
v = uuid.UUID(v)
102134
except (AttributeError, ValueError):
103135
raise DataJointError(
104136
"Badly formed UUID {v} in restriction by `{k}`".format(k=k, v=v)
105137
)
106-
return "X'%s'" % v.bytes.hex()
138+
return f"{k}=X'{v.bytes.hex()}'"
107139
if isinstance(
108-
v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)
140+
v,
141+
(
142+
datetime.date,
143+
datetime.datetime,
144+
datetime.time,
145+
decimal.Decimal,
146+
list,
147+
),
109148
):
110-
return '"%s"' % v
149+
return f'{k}="{v}"'
111150
if isinstance(v, str):
112-
return '"%s"' % v.replace("%", "%%").replace("\\", "\\\\")
113-
return "%r" % v
151+
v = v.replace("%", "%%").replace("\\", "\\\\")
152+
return f'{k}="{v}"'
153+
return f"{k}={v}"
154+
155+
def combine_conditions(negate, conditions):
156+
return f"{'NOT ' if negate else ''} ({')AND('.join(conditions)})"
114157

115158
negate = False
116159
while isinstance(condition, Not):
117160
negate = not negate
118161
condition = condition.restriction
119-
template = "NOT (%s)" if negate else "%s"
120162

121163
# restrict by string
122164
if isinstance(condition, str):
123165
columns.update(extract_column_names(condition))
124-
return template % condition.strip().replace(
125-
"%", "%%"
166+
return combine_conditions(
167+
negate, conditions=[condition.strip().replace("%", "%%")]
126168
) # escape %, see issue #376
127169

128170
# restrict by AndList
@@ -139,7 +181,7 @@ def prep_value(k, v):
139181
return negate # if any item is False, the whole thing is False
140182
if not items:
141183
return not negate # and empty AndList is True
142-
return template % ("(" + ") AND (".join(items) + ")")
184+
return combine_conditions(negate, conditions=items)
143185

144186
# restriction by dj.U evaluates to True
145187
if isinstance(condition, U):
@@ -151,23 +193,19 @@ def prep_value(k, v):
151193

152194
# restrict by a mapping/dict -- convert to an AndList of string equality conditions
153195
if isinstance(condition, collections.abc.Mapping):
154-
common_attributes = set(condition).intersection(query_expression.heading.names)
196+
common_attributes = set(c.split(".", 1)[0] for c in condition).intersection(
197+
query_expression.heading.names
198+
)
155199
if not common_attributes:
156200
return not negate # no matching attributes -> evaluates to True
157201
columns.update(common_attributes)
158-
return template % (
159-
"("
160-
+ ") AND (".join(
161-
"`%s`%s"
162-
% (
163-
k,
164-
" IS NULL"
165-
if condition[k] is None
166-
else f"={prep_value(k, condition[k])}",
167-
)
168-
for k in common_attributes
169-
)
170-
+ ")"
202+
return combine_conditions(
203+
negate,
204+
conditions=[
205+
prep_value(k, v)
206+
for k, v in condition.items()
207+
if k.split(".", 1)[0] in common_attributes # handle json indexing
208+
],
171209
)
172210

173211
# restrict by a numpy record -- convert to an AndList of string equality conditions
@@ -178,12 +216,9 @@ def prep_value(k, v):
178216
if not common_attributes:
179217
return not negate # no matching attributes -> evaluate to True
180218
columns.update(common_attributes)
181-
return template % (
182-
"("
183-
+ ") AND (".join(
184-
"`%s`=%s" % (k, prep_value(k, condition[k])) for k in common_attributes
185-
)
186-
+ ")"
219+
return combine_conditions(
220+
negate,
221+
conditions=[prep_value(k, condition[k]) for k in common_attributes],
187222
)
188223

189224
# restrict by a QueryExpression subclass -- trigger instantiation and move on
@@ -231,7 +266,11 @@ def prep_value(k, v):
231266
] # ignore False conditions
232267
if any(item is True for item in or_list): # if any item is True, entirely True
233268
return not negate
234-
return template % ("(%s)" % " OR ".join(or_list)) if or_list else negate
269+
return (
270+
f"{'NOT ' if negate else ''} ({' OR '.join(or_list)})"
271+
if or_list
272+
else negate
273+
)
235274

236275

237276
def extract_column_names(sql_expression):

datajoint/declare.py

+18-20
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
from .errors import DataJointError, _support_filepath_types, FILEPATH_FEATURE_SWITCH
99
from .attribute_adapter import get_adapter
10+
from .condition import translate_attribute
1011

1112
UUID_DATA_TYPE = "binary(16)"
1213
MAX_TABLE_NAME_LENGTH = 64
@@ -23,6 +24,7 @@
2324
DECIMAL=r"(decimal|numeric)(\s*\(.+\))?(\s+unsigned)?$",
2425
FLOAT=r"(double|float|real)(\s*\(.+\))?(\s+unsigned)?$",
2526
STRING=r"(var)?char\s*\(.+\)$",
27+
JSON=r"json$",
2628
ENUM=r"enum\s*\(.+\)$",
2729
BOOL=r"bool(ean)?$", # aliased to tinyint(1)
2830
TEMPORAL=r"(date|datetime|time|timestamp|year)(\s*\(.+\))?$",
@@ -129,25 +131,9 @@ def build_attribute_parser():
129131
return attribute_name + pp.Optional(default) + colon + data_type + comment
130132

131133

132-
def build_index_parser():
133-
left = pp.Literal("(").suppress()
134-
right = pp.Literal(")").suppress()
135-
unique = pp.Optional(pp.CaselessKeyword("unique")).setResultsName("unique")
136-
index = pp.CaselessKeyword("index").suppress()
137-
attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]"))
138-
return (
139-
unique
140-
+ index
141-
+ left
142-
+ pp.delimitedList(attribute_name).setResultsName("attr_list")
143-
+ right
144-
)
145-
146-
147134
foreign_key_parser_old = build_foreign_key_parser_old()
148135
foreign_key_parser = build_foreign_key_parser()
149136
attribute_parser = build_attribute_parser()
150-
index_parser = build_index_parser()
151137

152138

153139
def is_foreign_key(line):
@@ -275,7 +261,7 @@ def prepare_declare(definition, context):
275261
foreign_key_sql,
276262
index_sql,
277263
)
278-
elif re.match(r"^(unique\s+)?index[^:]*$", line, re.I): # index
264+
elif re.match(r"^(unique\s+)?index\s*.*$", line, re.I): # index
279265
compile_index(line, index_sql)
280266
else:
281267
name, sql, store = compile_attribute(line, in_key, foreign_key_sql, context)
@@ -449,10 +435,22 @@ def alter(definition, old_definition, context):
449435

450436

451437
def compile_index(line, index_sql):
452-
match = index_parser.parseString(line)
438+
def format_attribute(attr):
439+
match, attr = translate_attribute(attr)
440+
if match is None:
441+
return attr
442+
if match["path"] is None:
443+
return f"`{attr}`"
444+
return f"({attr})"
445+
446+
match = re.match(
447+
r"(?P<unique>unique\s+)?index\s*\(\s*(?P<args>.*)\)", line, re.I
448+
).groupdict()
449+
attr_list = re.findall(r"(?:[^,(]|\([^)]*\))+", match["args"])
453450
index_sql.append(
454-
"{unique} index ({attrs})".format(
455-
unique=match.unique, attrs=",".join("`%s`" % a for a in match.attr_list)
451+
"{unique}index ({attrs})".format(
452+
unique="unique " if match["unique"] else "",
453+
attrs=",".join(format_attribute(a.strip()) for a in attr_list),
456454
)
457455
)
458456

datajoint/expression.py

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
assert_join_compatibility,
1515
extract_column_names,
1616
PromiscuousOperand,
17+
translate_attribute,
1718
)
1819
from .declare import CONSTANT_LITERALS
1920

@@ -342,6 +343,9 @@ def proj(self, *attributes, **named_attributes):
342343
from other attributes available before the projection.
343344
Each attribute name can only be used once.
344345
"""
346+
named_attributes = {
347+
k: translate_attribute(v)[1] for k, v in named_attributes.items()
348+
}
345349
# new attributes in parentheses are included again with the new name without removing original
346350
duplication_pattern = re.compile(
347351
rf'^\s*\(\s*(?!{"|".join(CONSTANT_LITERALS)})(?P<name>[a-zA-Z_]\w*)\s*\)\s*$'

datajoint/fetch.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pandas
55
import itertools
66
import re
7+
import json
78
import numpy as np
89
import uuid
910
import numbers
@@ -47,6 +48,8 @@ def _get(connection, attr, data, squeeze, download_path):
4748
"""
4849
if data is None:
4950
return
51+
if attr.json:
52+
return json.loads(data)
5053

5154
extern = (
5255
connection.schemas[attr.database].external[attr.store]
@@ -59,7 +62,6 @@ def _get(connection, attr, data, squeeze, download_path):
5962

6063
if attr.is_filepath:
6164
return adapt(extern.download_filepath(uuid.UUID(bytes=data))[0])
62-
6365
if attr.is_attachment:
6466
# Steps:
6567
# 1. get the attachment filename

datajoint/heading.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
numeric=None,
2929
string=None,
3030
uuid=False,
31+
json=None,
3132
is_blob=False,
3233
is_attachment=False,
3334
is_filepath=False,
@@ -142,7 +143,7 @@ def non_blobs(self):
142143
return [
143144
k
144145
for k, v in self.attributes.items()
145-
if not v.is_blob and not v.is_attachment and not v.is_filepath
146+
if not (v.is_blob or v.is_attachment or v.is_filepath or v.json)
146147
]
147148

148149
@property
@@ -290,6 +291,7 @@ def _init_from_database(self):
290291
),
291292
is_blob=bool(TYPE_PATTERN["INTERNAL_BLOB"].match(attr["type"])),
292293
uuid=False,
294+
json=bool(TYPE_PATTERN["JSON"].match(attr["type"])),
293295
is_attachment=False,
294296
is_filepath=False,
295297
adapter=None,
@@ -375,10 +377,15 @@ def _init_from_database(self):
375377
)
376378

377379
if attr["in_key"] and any(
378-
(attr["is_blob"], attr["is_attachment"], attr["is_filepath"])
380+
(
381+
attr["is_blob"],
382+
attr["is_attachment"],
383+
attr["is_filepath"],
384+
attr["json"],
385+
)
379386
):
380387
raise DataJointError(
381-
"Blob, attachment, or filepath attributes are not allowed in the primary key"
388+
"Json, Blob, attachment, or filepath attributes are not allowed in the primary key"
382389
)
383390

384391
if (
@@ -419,7 +426,8 @@ def _init_from_database(self):
419426
):
420427
if item["Key_name"] != "PRIMARY":
421428
keys[item["Key_name"]][item["Seq_in_index"]] = dict(
422-
column=item["Column_name"],
429+
column=item["Column_name"]
430+
or f"({item['Expression']})".replace(r"\'", "'"),
423431
unique=(item["Non_unique"] == 0),
424432
nullable=item["Null"].lower() == "yes",
425433
)

datajoint/table.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import uuid
99
import csv
1010
import re
11+
import json
1112
from pathlib import Path
1213
from .settings import config
1314
from .declare import declare, alter
@@ -831,6 +832,8 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False):
831832
value = self.external[attr.store].upload_filepath(value).bytes
832833
elif attr.numeric:
833834
value = str(int(value) if isinstance(value, bool) else value)
835+
elif attr.json:
836+
value = json.dumps(value)
834837
return name, placeholder, value
835838

836839
def __make_row_to_insert(self, row, field_list, ignore_extra_fields):

docs/.docker/pip_requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ mdx-truly-sane-lists
77
mkdocs-gen-files
88
mkdocs-literate-nav
99
mkdocs-exclude-search
10+
mkdocs-jupyter

docs/mkdocs.yaml

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ nav:
1515
- Reproducibility:
1616
- Table Tiers: reproduce/table-tiers.md
1717
- Make Method: reproduce/make-method.md
18-
- Tutorials: tutorials.md
18+
- Tutorials:
19+
- tutorials/json.ipynb
1920
- Develop: develop.md
2021
- Changelog: about/changelog.md
2122
- API: api/ # defer to gen-files + literate-nav
@@ -72,6 +73,8 @@ plugins:
7273
exclude:
7374
- "*/navigation.md"
7475
- "*/archive/*md"
76+
- mkdocs-jupyter:
77+
include: ["*.ipynb"]
7578
markdown_extensions:
7679
- attr_list
7780
- toc:

docs/src/concepts.md

Whitespace-only changes.

docs/src/tutorials.md

-3
This file was deleted.

0 commit comments

Comments
 (0)