Skip to content

Commit 03e2fc4

Browse files
committed
(issue 724) :
- moved LHDFStore to inout/hdf.py - implemented PandasStorer and PytablesStorer - updated LArray/Axis/Group.to_hdf - removed Metadata.to_hdf and Metadata.from_hdf - renamed PandasHDFHandler as HDFHandler
1 parent 40c8dd9 commit 03e2fc4

File tree

11 files changed

+660
-165
lines changed

11 files changed

+660
-165
lines changed

doc/source/api.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,25 @@ ReportSheet
706706
ReportSheet.add_graphs
707707
ReportSheet.newline
708708

709+
HDF
710+
===
711+
712+
.. autosummary::
713+
:toctree: _generated/
714+
715+
LHDFStore
716+
717+
.. autosummary::
718+
:toctree: _generated/
719+
720+
LHDFStore.filename
721+
LHDFStore.is_open
722+
LHDFStore.keys
723+
LHDFStore.items
724+
LHDFStore.summary
725+
LHDFStore.close
726+
727+
709728
.. _api-misc:
710729

711730
Miscellaneous

larray/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from larray.inout.pandas import from_frame, from_series
2727
from larray.inout.csv import read_csv, read_tsv, read_eurostat
2828
from larray.inout.excel import read_excel
29-
from larray.inout.hdf import read_hdf
29+
from larray.inout.hdf import read_hdf, LHDFStore
3030
from larray.inout.sas import read_sas
3131
from larray.inout.stata import read_stata
3232
from larray.inout.xw_excel import open_excel, Workbook
@@ -78,6 +78,7 @@
7878
'from_lists', 'from_string', 'from_frame', 'from_series', 'read_csv', 'read_tsv',
7979
'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'read_stata',
8080
'open_excel', 'Workbook', 'ExcelReport', 'ReportSheet',
81+
'LHDFStore',
8182
# utils
8283
'get_options', 'set_options',
8384
# viewer

larray/core/array.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
6363
from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
6464
float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
65-
renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
65+
renamed_to, deprecate_kwarg, lazy_attribute, unique_multi, SequenceZip,
6666
Repeater, Product, ensure_no_numpy_type, PY2)
6767
from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION
6868

@@ -6734,13 +6734,9 @@ def to_hdf(self, filepath, key):
67346734
67356735
>>> a.to_hdf('test.h5', 'arrays/a') # doctest: +SKIP
67366736
"""
6737-
key = _translate_group_key_hdf(key)
6737+
from larray.inout.hdf import LHDFStore
67386738
with LHDFStore(filepath) as store:
6739-
store.put(key, self.to_frame())
6740-
attrs = store.get_storer(key).attrs
6741-
attrs.type = 'Array'
6742-
attrs.writer = 'LArray'
6743-
self.meta.to_hdf(store, key)
6739+
store.put(key, self)
67446740

67456741
def to_stata(self, filepath_or_buffer, **kwargs):
67466742
r"""

larray/core/axis.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
_range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
1717
from larray.util.oset import *
1818
from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
19-
renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi, Product)
19+
renamed_to, common_type, lazy_attribute, _isnoneslice, unique_multi, Product)
2020

2121

2222
np_frompyfunc = np.frompyfunc
@@ -1344,19 +1344,13 @@ def to_hdf(self, filepath, key=None):
13441344
13451345
>>> a.to_hdf('test.h5', 'axes/a') # doctest: +SKIP
13461346
"""
1347+
from larray.inout.hdf import LHDFStore
13471348
if key is None:
13481349
if self.name is None:
13491350
raise ValueError("Argument key must be provided explicitly in case of anonymous axis")
13501351
key = self.name
1351-
key = _translate_group_key_hdf(key)
1352-
dtype_kind = self.labels.dtype.kind
1353-
data = np.char.encode(self.labels, 'utf-8') if dtype_kind == 'U' else self.labels
1354-
s = pd.Series(data=data, name=self.name)
13551352
with LHDFStore(filepath) as store:
1356-
store.put(key, s)
1357-
store.get_storer(key).attrs.type = 'Axis'
1358-
store.get_storer(key).attrs.dtype_kind = dtype_kind
1359-
store.get_storer(key).attrs.wildcard = self.iswildcard
1353+
store.put(key, self)
13601354

13611355
@property
13621356
def dtype(self):

larray/core/group.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
1414
from larray.util.oset import *
1515
from larray.util.misc import (basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary, _isintstring,
16-
renamed_to, LHDFStore)
16+
renamed_to)
1717

1818

1919
def _slice_to_str(key, repr_func=str):
@@ -1453,27 +1453,13 @@ def to_hdf(self, filepath, key=None, axis_key=None):
14531453
>>> # save both the group 'b01' and the associated axis 'b'
14541454
>>> b01.to_hdf('test.h5') # doctest: +SKIP
14551455
"""
1456+
from larray.inout.hdf import LHDFStore
14561457
if key is None:
14571458
if self.name is None:
14581459
raise ValueError("Argument key must be provided explicitly in case of anonymous group")
14591460
key = self.name
1460-
key = _translate_group_key_hdf(key)
1461-
if axis_key is None:
1462-
if self.axis.name is None:
1463-
raise ValueError("Argument axis_key must be provided explicitly if the associated axis is anonymous")
1464-
axis_key = self.axis.name
1465-
data = self.eval()
1466-
dtype_kind = data.dtype.kind if isinstance(data, np.ndarray) else ''
1467-
if dtype_kind == 'U':
1468-
data = np.char.encode(data, 'utf-8')
1469-
s = pd.Series(data=data, name=self.name)
14701461
with LHDFStore(filepath) as store:
1471-
store.put(key, s)
1472-
store.get_storer(key).attrs.type = 'Group'
1473-
store.get_storer(key).attrs.dtype_kind = dtype_kind
1474-
if axis_key not in store:
1475-
self.axis.to_hdf(store, key=axis_key)
1476-
store.get_storer(key).attrs.axis_key = axis_key
1462+
store.put(key, self, axis_key=axis_key)
14771463

14781464
# this makes range(LGroup(int)) possible
14791465
def __index__(self):

larray/core/metadata.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -162,17 +162,3 @@ def _convert_value(value):
162162
return value
163163

164164
return Metadata([(key, _convert_value(value)) for key, value in zip(array.axes.labels[0], array.data)])
165-
166-
# ---------- IO methods ----------
167-
def to_hdf(self, hdfstore, key=None):
168-
if len(self):
169-
attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
170-
attrs.metadata = self
171-
172-
@classmethod
173-
def from_hdf(cls, hdfstore, key=None):
174-
attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
175-
if 'metadata' in attrs:
176-
return attrs.metadata
177-
else:
178-
return None

larray/core/session.py

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,41 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, division, print_function
33

4+
import fnmatch
45
import os
5-
import sys
66
import re
7-
import fnmatch
7+
import sys
88
import warnings
99
from collections import OrderedDict, Iterable
1010

1111
import numpy as np
1212

13-
from larray.core.metadata import Metadata
14-
from larray.core.group import Group
13+
from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence
1514
from larray.core.axis import Axis
1615
from larray.core.constants import nan
17-
from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence, aslarray
18-
from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
16+
from larray.core.group import Group
17+
from larray.core.metadata import Metadata
1918
from larray.inout.session import ext_default_engine, get_file_handler
19+
from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
20+
21+
22+
def _get_handler(engine, fname, overwrite, **kwargs):
23+
if engine == 'auto':
24+
_, ext = os.path.splitext(fname)
25+
ext = ext.strip('.') if '.' in ext else 'csv'
26+
engine = ext_default_engine[ext]
27+
if engine == 'hdf':
28+
engine_hdf = 'auto'
29+
if '_hdf' in engine:
30+
engine_hdf, engine = engine.split('_')
31+
handler_cls = get_file_handler(engine)
32+
if engine == 'pandas_csv' and 'sep' in kwargs:
33+
handler = handler_cls(fname, overwrite, kwargs['sep'])
34+
elif engine == 'hdf':
35+
handler = handler_cls(fname, overwrite, engine=engine_hdf)
36+
else:
37+
handler = handler_cls(fname, overwrite)
38+
return handler
2039

2140

2241
# XXX: inherit from OrderedDict or LArray?
@@ -358,7 +377,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
358377
List of objects to load.
359378
If `fname` is None, list of paths to CSV files.
360379
Defaults to all valid objects present in the file/directory.
361-
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
380+
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
362381
Load using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
363382
display : bool, optional
364383
Whether or not to display which file is being worked on. Defaults to False.
@@ -415,15 +434,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
415434
engine = ext_default_engine['csv']
416435
else:
417436
raise ValueError("List of paths to only CSV files expected. Got {}".format(names))
418-
if engine == 'auto':
419-
_, ext = os.path.splitext(fname)
420-
ext = ext.strip('.') if '.' in ext else 'csv'
421-
engine = ext_default_engine[ext]
422-
handler_cls = get_file_handler(engine)
423-
if engine == 'pandas_csv' and 'sep' in kwargs:
424-
handler = handler_cls(fname, kwargs['sep'])
425-
else:
426-
handler = handler_cls(fname)
437+
handler = _get_handler(engine, fname, False, **kwargs)
427438
metadata, objects = handler.read(names, display=display, **kwargs)
428439
for k, v in objects.items():
429440
self[k] = v
@@ -442,7 +453,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
442453
List of names of LArray/Axis/Group objects to dump.
443454
If `fname` is None, list of paths to CSV files.
444455
Defaults to all objects present in the Session.
445-
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
456+
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
446457
Dump using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
447458
overwrite: bool, optional
448459
Whether or not to overwrite an existing file, if any. Ignored for CSV files and 'pandas_excel' engine.
@@ -482,15 +493,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
482493
>>> # replace arr1 and add arr4 in file output.h5
483494
>>> s2.save('output.h5', overwrite=False) # doctest: +SKIP
484495
"""
485-
if engine == 'auto':
486-
_, ext = os.path.splitext(fname)
487-
ext = ext.strip('.') if '.' in ext else 'csv'
488-
engine = ext_default_engine[ext]
489-
handler_cls = get_file_handler(engine)
490-
if engine == 'pandas_csv' and 'sep' in kwargs:
491-
handler = handler_cls(fname, overwrite, kwargs['sep'])
492-
else:
493-
handler = handler_cls(fname, overwrite)
496+
handler = _get_handler(engine, fname, overwrite, **kwargs)
494497
meta = self.meta if overwrite else None
495498
items = self.items()
496499
if names is not None:

0 commit comments

Comments
 (0)