Skip to content

Commit fd666c6

Browse files
committed
- added module to generate the files contained in the test/data/ directory
- renamed 'population_session' directory and files as 'demography_eurostat' - made 'demography_eurostat' as new available dataset in function load_example_data() - fix larray-project#785
1 parent 2025e56 commit fd666c6

38 files changed

+455
-73
lines changed

doc/source/tutorial/tutorial_IO.ipyml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -574,17 +574,17 @@ cells:
574574
- code: |
575575
# create a new Session object and load all arrays, axes, groups and metadata
576576
# from all CSV files located in the passed directory
577-
csv_dir = get_example_filepath('population_session')
577+
csv_dir = get_example_filepath('demography_eurostat')
578578
session = Session(csv_dir)
579579

580580
# create a new Session object and load all arrays, axes, groups and metadata
581581
# stored in the passed Excel file
582-
filepath_excel = get_example_filepath('population_session.xlsx')
582+
filepath_excel = get_example_filepath('demography_eurostat.xlsx')
583583
session = Session(filepath_excel)
584584

585585
# create a new Session object and load all arrays, axes, groups and metadata
586586
# stored in the passed HDF5 file
587-
filepath_hdf = get_example_filepath('population_session.h5')
587+
filepath_hdf = get_example_filepath('demography_eurostat.h5')
588588
session = Session(filepath_hdf)
589589

590590
print(session.summary())

doc/source/tutorial/tutorial_IO.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -835,17 +835,17 @@
835835
"source": [
836836
"# create a new Session object and load all arrays, axes, groups and metadata \n",
837837
"# from all CSV files located in the passed directory\n",
838-
"csv_dir = get_example_filepath('population_session')\n",
838+
"csv_dir = get_example_filepath('demography_eurostat')\n",
839839
"session = Session(csv_dir)\n",
840840
"\n",
841841
"# create a new Session object and load all arrays, axes, groups and metadata\n",
842842
"# stored in the passed Excel file\n",
843-
"filepath_excel = get_example_filepath('population_session.xlsx')\n",
843+
"filepath_excel = get_example_filepath('demography_eurostat.xlsx')\n",
844844
"session = Session(filepath_excel)\n",
845845
"\n",
846846
"# create a new Session object and load all arrays, axes, groups and metadata\n",
847847
"# stored in the passed HDF5 file\n",
848-
"filepath_hdf = get_example_filepath('population_session.h5')\n",
848+
"filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
849849
"session = Session(filepath_hdf)\n",
850850
"\n",
851851
"print(session.summary())"

doc/source/tutorial/tutorial_sessions.ipyml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ cells:
4343

4444
- code: |
4545
# load a session representing the results of a demographic model
46-
filepath_hdf = get_example_filepath('population_session.h5')
46+
filepath_hdf = get_example_filepath('demography_eurostat.h5')
4747
s_pop = Session(filepath_hdf)
4848

4949
# print the content of the session
@@ -188,7 +188,7 @@ cells:
188188

189189
- code: |
190190
# load a session representing the results of a demographic model
191-
filepath_hdf = get_example_filepath('population_session.h5')
191+
filepath_hdf = get_example_filepath('demography_eurostat.h5')
192192
s_pop = Session(filepath_hdf)
193193

194194
# create a copy of the original session

doc/source/tutorial/tutorial_sessions.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
"outputs": [],
7979
"source": [
8080
"# load a session representing the results of a demographic model\n",
81-
"filepath_hdf = get_example_filepath('population_session.h5')\n",
81+
"filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
8282
"s_pop = Session(filepath_hdf)\n",
8383
"\n",
8484
"# print the content of the session\n",
@@ -319,7 +319,7 @@
319319
"outputs": [],
320320
"source": [
321321
"# load a session representing the results of a demographic model\n",
322-
"filepath_hdf = get_example_filepath('population_session.h5')\n",
322+
"filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
323323
"s_pop = Session(filepath_hdf)\n",
324324
"\n",
325325
"# create a copy of the original session\n",

larray/example.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
_TEST_DIR = os.path.join(os.path.dirname(__file__), 'tests')
66

77
EXAMPLE_FILES_DIR = os.path.join(_TEST_DIR, 'data')
8-
# TODO : replace 'demography.h5' by 'population_session.h5' and remove 'demo' ?
98
AVAILABLE_EXAMPLE_DATA = {
10-
'demo': os.path.join(EXAMPLE_FILES_DIR, 'population_session.h5'),
11-
'demography': os.path.join(EXAMPLE_FILES_DIR, 'demography.h5')
9+
'demography': os.path.join(EXAMPLE_FILES_DIR, 'demography.h5'),
10+
'demography_eurostat': os.path.join(EXAMPLE_FILES_DIR, 'demography_eurostat.h5')
1211
}
1312
AVAILABLE_EXAMPLE_FILES = os.listdir(EXAMPLE_FILES_DIR)
1413

@@ -43,6 +42,7 @@ def get_example_filepath(fname):
4342
return fpath
4443

4544

45+
# TODO : replace # doctest: +SKIP by # doctest: +NORMALIZE_WHITESPACE once Python 2 has been dropped
4646
def load_example_data(name):
4747
r"""Load arrays used in the tutorial so that all examples in it can be reproduced.
4848
@@ -52,29 +52,36 @@ def load_example_data(name):
5252
Example data to load. Available example datasets are:
5353
5454
- demography
55+
- demography_eurostat
5556
5657
Returns
5758
-------
5859
Session
59-
Session containing one or several arrays
60+
Session containing one or several arrays.
6061
6162
Examples
6263
--------
6364
>>> demo = load_example_data('demography')
64-
>>> demo.pop.info # doctest: +SKIP
65-
26 x 3 x 121 x 2 x 2
66-
time [26]: 1991 1992 1993 ... 2014 2015 2016
67-
geo [3]: 'BruCap' 'Fla' 'Wal'
68-
age [121]: 0 1 2 ... 118 119 120
69-
sex [2]: 'M' 'F'
70-
nat [2]: 'BE' 'FO'
71-
>>> demo.qx.info # doctest: +SKIP
72-
26 x 3 x 121 x 2 x 2
73-
time [26]: 1991 1992 1993 ... 2014 2015 2016
74-
geo [3]: 'BruCap' 'Fla' 'Wal'
75-
age [121]: 0 1 2 ... 118 119 120
76-
sex [2]: 'M' 'F'
77-
nat [2]: 'BE' 'FO'
65+
>>> print(demo.summary()) # doctest: +NORMALIZE_WHITESPACE
66+
hh: time, geo, hh_type (26 x 3 x 7) [int64]
67+
pop: time, geo, age, sex, nat (26 x 3 x 121 x 2 x 2) [int64]
68+
qx: time, geo, age, sex, nat (26 x 3 x 121 x 2 x 2) [float64]
69+
>>> demo = load_example_data('demography_eurostat')
70+
>>> print(demo.summary()) # doctest: +SKIP
71+
Metadata:
72+
title: Demographic datasets for a small selection of countries in Europe
73+
source: demo_jpan, demo_fasec, demo_magec and demo_marcz tables from Eurostat
74+
citizen: citizen ['Total' 'Reporting_country' 'Foreign' 'Stateless' 'Unknown'] (5)
75+
country: country ['Belgium' 'France' 'Germany'] (3)
76+
gender: gender ['Male' 'Female'] (2)
77+
partner: partner ['Total' 'Reporting_country' 'Foreign' 'Stateless' 'Unknown'] (5)
78+
time: time [2013 2014 2015] (3)
79+
even_years: time[2014] >> even_years (1)
80+
odd_years: time[2013 2015] >> odd_years (2)
81+
births: country, gender, time (3 x 2 x 3) [int32]
82+
deaths: country, gender, time (3 x 2 x 3) [int32]
83+
marriages: country, partner, citizen, time (3 x 5 x 5 x 3) [int32]
84+
pop: country, gender, time (3 x 2 x 3) [int32]
7885
"""
7986
if name is None:
8087
name = 'demography'

larray/inout/csv.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
7878
country,gender\time,2013,2014,2015
7979
Belgium,Male,5472856,5493792,5524068
8080
Belgium,Female,5665118,5687048,5713206
81-
France,Male,31772665,31936596,32175328
82-
France,Female,33827685,34005671,34280951
81+
France,Male,31772665,32045129,32174258
82+
France,Female,33827685,34120851,34283895
8383
Germany,Male,39380976,39556923,39835457
8484
Germany,Female,41142770,41210540,41362080
8585
@@ -93,8 +93,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
9393
country gender\time 2013 2014 2015
9494
Belgium Male 5472856 5493792 5524068
9595
Belgium Female 5665118 5687048 5713206
96-
France Male 31772665 31936596 32175328
97-
France Female 33827685 34005671 34280951
96+
France Male 31772665 32045129 32174258
97+
France Female 33827685 34120851 34283895
9898
Germany Male 39380976 39556923 39835457
9999
Germany Female 41142770 41210540 41362080
100100
@@ -108,7 +108,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
108108
country,gender\time,2013,2014,2015
109109
Belgium,Male,5472856,5493792,5524068
110110
Belgium,Female,5665118,5687048,5713206
111-
France,Female,33827685,34005671,34280951
111+
France,Female,33827685,34120851,34283895
112112
Germany,Male,39380976,39556923,39835457
113113
>>> # by default, cells associated with missing label combinations are filled with NaN.
114114
>>> # In that case, an int array is converted to a float array.
@@ -117,7 +117,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
117117
Belgium Male 5472856.0 5493792.0 5524068.0
118118
Belgium Female 5665118.0 5687048.0 5713206.0
119119
France Male nan nan nan
120-
France Female 33827685.0 34005671.0 34280951.0
120+
France Female 33827685.0 34120851.0 34283895.0
121121
Germany Male 39380976.0 39556923.0 39835457.0
122122
Germany Female nan nan nan
123123
>>> # using argument 'fill_value', you can choose which value to use to fill missing cells.
@@ -126,7 +126,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
126126
Belgium Male 5472856 5493792 5524068
127127
Belgium Female 5665118 5687048 5713206
128128
France Male 0 0 0
129-
France Female 33827685 34005671 34280951
129+
France Female 33827685 34120851 34283895
130130
Germany Male 39380976 39556923 39835457
131131
Germany Female 0 0 0
132132
@@ -140,8 +140,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
140140
country,gender,2013,2014,2015
141141
Belgium,Male,5472856,5493792,5524068
142142
Belgium,Female,5665118,5687048,5713206
143-
France,Male,31772665,31936596,32175328
144-
France,Female,33827685,34005671,34280951
143+
France,Male,31772665,32045129,32174258
144+
France,Female,33827685,34120851,34283895
145145
Germany,Male,39380976,39556923,39835457
146146
Germany,Female,41142770,41210540,41362080
147147
>>> # read the array stored in the CSV file as is
@@ -177,13 +177,13 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
177177
Belgium,2014,11180840
178178
Belgium,2015,11237274
179179
France,2013,65600350
180-
France,2014,65942267
181-
France,2015,66456279
180+
France,2014,66165980
181+
France,2015,66458153
182182
>>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_csv
183183
>>> read_csv(fname, wide=False)
184184
country\time 2013 2014 2015
185185
Belgium 11137974 11180840 11237274
186-
France 65600350 65942267 66456279
186+
France 65600350 66165980 66458153
187187
"""
188188
if not np.isnan(na):
189189
fill_value = na

larray/inout/excel.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
8484
country gender\time 2013 2014 2015
8585
Belgium Male 5472856 5493792 5524068
8686
Belgium Female 5665118 5687048 5713206
87-
France Male 31772665 31936596 32175328
88-
France Female 33827685 34005671 34280951
87+
France Male 31772665 32045129 32174258
88+
France Female 33827685 34120851 34283895
8989
Germany Male 39380976 39556923 39835457
9090
Germany Female 41142770 41210540 41362080
9191
@@ -109,7 +109,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
109109
country gender\time 2013 2014 2015
110110
Belgium Male 5472856 5493792 5524068
111111
Belgium Female 5665118 5687048 5713206
112-
France Female 33827685 34005671 34280951
112+
France Female 33827685 34120851 34283895
113113
Germany Male 39380976 39556923 39835457
114114
115115
By default, cells associated with missing label combinations are filled with NaN. In that case, an int array
@@ -120,7 +120,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
120120
Belgium Male 5472856.0 5493792.0 5524068.0
121121
Belgium Female 5665118.0 5687048.0 5713206.0
122122
France Male nan nan nan
123-
France Female 33827685.0 34005671.0 34280951.0
123+
France Female 33827685.0 34120851.0 34283895.0
124124
Germany Male 39380976.0 39556923.0 39835457.0
125125
Germany Female nan nan nan
126126
@@ -131,7 +131,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
131131
Belgium Male 5472856 5493792 5524068
132132
Belgium Female 5665118 5687048 5713206
133133
France Male 0 0 0
134-
France Female 33827685 34005671 34280951
134+
France Female 33827685 34120851 34283895
135135
Germany Male 39380976 39556923 39835457
136136
Germany Female 0 0 0
137137
@@ -142,8 +142,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
142142
country gender 2013 2014 2015
143143
Belgium Male 5472856 5493792 5524068
144144
Belgium Female 5665118 5687048 5713206
145-
France Male 31772665 31936596 32175328
146-
France Female 33827685 34005671 34280951
145+
France Male 31772665 32045129 32174258
146+
France Female 33827685 34120851 34283895
147147
Germany Male 39380976 39556923 39835457
148148
Germany Female 41142770 41210540 41362080
149149
@@ -177,14 +177,14 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
177177
Belgium 2014 11180840
178178
Belgium 2015 11237274
179179
France 2013 65600350
180-
France 2014 65942267
181-
France 2015 66456279
180+
France 2014 66165980
181+
France 2015 66458153
182182
183183
>>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_excel
184184
>>> read_excel(fname, 'pop_narrow_format', wide=False)
185185
country\time 2013 2014 2015
186186
Belgium 11137974 11180840 11237274
187-
France 65600350 65942267 66456279
187+
France 65600350 66165980 66458153
188188
189189
Extract array from a given range (xlwings only)
190190

larray/inout/hdf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
5757
country gender\time 2013 2014 2015
5858
Belgium Male 5472856 5493792 5524068
5959
Belgium Female 5665118 5687048 5713206
60-
France Male 31772665 31936596 32175328
61-
France Female 33827685 34005671 34280951
60+
France Male 31772665 32045129 32174258
61+
France Female 33827685 34120851 34283895
6262
Germany Male 39380976 39556923 39835457
6363
Germany Female 41142770 41210540 41362080
6464
"""

larray/inout/xw_reporting.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def template(self):
7979
8080
Examples
8181
--------
82-
>>> demo = load_example_data('demo')
82+
>>> demo = load_example_data('demography_eurostat')
8383
8484
Passing the name of the template (only if a template directory has been set)
8585
@@ -245,7 +245,7 @@ def add_graph(self, data, title=None, template=None, width=None, height=None):
245245
246246
Examples
247247
--------
248-
>>> demo = load_example_data('demo')
248+
>>> demo = load_example_data('demography_eurostat')
249249
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
250250
251251
>>> sheet_be = report.new_sheet('Belgium')
@@ -300,7 +300,7 @@ def add_graphs(self, array_per_title, axis_per_loop_variable, template=None, wid
300300
301301
Examples
302302
--------
303-
>>> demo = load_example_data('demo')
303+
>>> demo = load_example_data('demography_eurostat')
304304
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
305305
306306
>>> sheet_pop = report.new_sheet('Population')
@@ -353,7 +353,7 @@ class AbstractExcelReport(AbstractReportItem):
353353
354354
Examples
355355
--------
356-
>>> demo = load_example_data('demo')
356+
>>> demo = load_example_data('demography_eurostat')
357357
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
358358
359359
Set a new destination sheet
@@ -428,7 +428,7 @@ def new_sheet(self, sheet_name):
428428
429429
Examples
430430
--------
431-
>>> demo = load_example_data('demo')
431+
>>> demo = load_example_data('demography_eurostat')
432432
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
433433
434434
>>> # prepare new output sheet named 'Belgium'
@@ -471,7 +471,7 @@ def to_excel(self, filepath, data_sheet_name='__data__', overwrite=True):
471471
472472
Examples
473473
--------
474-
>>> demo = load_example_data('demo')
474+
>>> demo = load_example_data('demography_eurostat')
475475
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
476476
>>> report.template = 'Line_Marker'
477477
14 Bytes
Binary file not shown.
48.6 KB
Binary file not shown.
15.8 KB
Binary file not shown.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
country,gender,time,partner,citizen
2+
Belgium,Male,2013,Total,Total
3+
France,Female,2014,Reporting_country,Reporting_country
4+
Germany,,2015,Foreign,Foreign
5+
,,,Stateless,Stateless
6+
,,,Unknown,Unknown
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
metadata,
2+
title,Demographic datasets for a small selection of countries in Europe
3+
source,"demo_jpan, demo_fasec, demo_magec and demo_marcz tables from Eurostat"

0 commit comments

Comments
 (0)