Skip to content

Commit 51b95fa

Browse files
committed
Refactoring of _pyco_tree:
* Improved compile time using type erasure. * Added the save_kd_tree and load_kd_tree functions. * Replaced several std::runtime_error exceptions by std::invalid_argument. Updated the minimum Python version to 3.10. Avoid global namespace redeclaration of index error with GNU extensions.
1 parent 69977bf commit 51b95fa

File tree

22 files changed

+699
-385
lines changed

22 files changed

+699
-385
lines changed

.github/workflows/pip.yml

+11-10
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,26 @@ jobs:
1818
- name: Setup Python
1919
uses: actions/setup-python@v4
2020
with:
21-
python-version: "3.7"
21+
python-version: "3.10"
2222

23-
# On macos-latest, OpenMP is already installed but not visible to CMake.
24-
# Library libomp 15.0+ has been made keg-only.
25-
# https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927
23+
# On macos-latest, OpenMP is not installed and not visible to CMake.
2624
- name: Setup OpenMP for macOS
2725
if: runner.os == 'macOS'
28-
run: brew link --force libomp
26+
run: |
27+
brew install libomp
28+
brew link --force libomp
2929
3030
# The global version of pybind11 allows us to find it via CMake.
3131
- name: Update setup related tools
3232
run: |
33-
python -m pip install --upgrade wheel setuptools
34-
pip install ninja
35-
pip install scikit-build
36-
pip install pybind11-global
33+
python -m pip install wheel
34+
python -m pip install setuptools
35+
python -m pip install ninja
36+
python -m pip install scikit-build
37+
python -m pip install pybind11-global
3738
3839
- name: Install with pip
39-
run: pip install ./ -v
40+
run: python -m pip install ./ -v
4041

4142
- name: Test Python bindings
4243
run: python -m unittest discover -s ./test/pyco_tree -p '*_test.py' -v

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ Optional:
8080
* [Google Benchmark](https://github.com/google/benchmark) is needed to run any of the benchmarks. The [nanoflann](https://github.com/jlblancoc/nanoflann) and [OpenCV](https://opencv.org/) benchmarks also require their respective libraries to be installed.
8181

8282
Python bindings:
83-
* [Python](https://www.python.org/). Version 3.7 or higher.
83+
* [Python](https://www.python.org/). Version 3.10 or higher.
8484
* [pybind11](https://github.com/pybind/pybind11). Used to ease the creation of Python bindings. Available under the [BSD](https://github.com/pybind/pybind11/blob/master/LICENSE) license and copyright.
8585
* [OpenMP](https://www.openmp.org/). For parallelization of queries.
8686
* [numpy](https://numpy.org/). Points and search results are represented by ndarrays.

examples/eigen/eigen.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,16 @@ void col_major_support() {
8484
using kd_tree = pico_tree::kd_tree<map>;
8585
using neighbor = typename kd_tree::neighbor_type;
8686
using scalar = typename point::Scalar;
87-
constexpr int dim = point::RowsAtCompileTime;
87+
constexpr Eigen::Index dim = point::RowsAtCompileTime;
8888

8989
auto points = generate_random_eigen_n<point>(num_points, point_area);
9090
point p = point::Random() * point_area / scalar(2.0);
9191

9292
std::cout << "Eigen RowMajor: " << map::IsRowMajor << std::endl;
9393

94-
kd_tree tree(map(points.data()->data(), dim, points.size()), max_leaf_count);
94+
kd_tree tree(
95+
map(points.data()->data(), dim, static_cast<Eigen::Index>(num_points)),
96+
max_leaf_count);
9597

9698
std::vector<neighbor> knn;
9799
pico_tree::scoped_timer t("pico_tree col major", run_count);
@@ -108,14 +110,16 @@ void row_major_support() {
108110
using kd_tree = pico_tree::kd_tree<map>;
109111
using neighbor = typename kd_tree::neighbor_type;
110112
using scalar = typename point::Scalar;
111-
constexpr int dim = point::ColsAtCompileTime;
113+
constexpr Eigen::Index dim = point::ColsAtCompileTime;
112114

113115
auto points = generate_random_eigen_n<point>(num_points, point_area);
114116
point p = point::Random() * point_area / scalar(2.0);
115117

116118
std::cout << "Eigen RowMajor: " << point::IsRowMajor << std::endl;
117119

118-
kd_tree tree(map(points.data()->data(), points.size(), dim), max_leaf_count);
120+
kd_tree tree(
121+
map(points.data()->data(), static_cast<Eigen::Index>(num_points), dim),
122+
max_leaf_count);
119123

120124
std::vector<neighbor> knn;
121125
pico_tree::scoped_timer t("pico_tree row major", run_count);

examples/kd_tree/kd_tree_search.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void search_s1() {
9393
std::cout << "Closest angles (index, distance, value): " << std::endl;
9494
for (auto const& nn : knn) {
9595
std::cout << " " << nn.index << ", " << nn.distance << ", "
96-
<< tree.space()[nn.index] << std::endl;
96+
<< tree.space()[static_cast<std::size_t>(nn.index)] << std::endl;
9797
}
9898
}
9999

examples/opencv/opencv.cpp

+8-6
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,8 @@
44
#include <pico_tree/vector_traits.hpp>
55
#include <random>
66

7-
using index = int;
8-
using scalar = float;
9-
10-
index const num_points = 1024 * 1024 * 2;
11-
scalar const point_area = 1000.0;
7+
std::size_t const num_points = 1024 * 1024 * 2;
8+
float const point_area = 1000.0;
129
std::size_t const run_count = 1024 * 1024;
1310

1411
template <typename Vec_>
@@ -30,6 +27,8 @@ std::vector<Vec_> generate_random_vec_n(
3027

3128
// This example shows to build a kd_tree from a vector of cv::Point3.
3229
void basic_vector() {
30+
using index = int;
31+
using scalar = float;
3332
using point = cv::Vec<scalar, 3>;
3433
std::vector<point> random =
3534
generate_random_vec_n<point>(num_points, point_area);
@@ -47,6 +46,9 @@ void basic_vector() {
4746

4847
// This example shows to build a kd_tree using a cv::Mat.
4948
void basic_matrix() {
49+
using index = int;
50+
using scalar = float;
51+
5052
// Multiple columns based on the number of coordinates in a point.
5153
{
5254
constexpr int dim = 3;
@@ -55,7 +57,7 @@ void basic_matrix() {
5557

5658
pico_tree::kd_tree<pico_tree::opencv_mat_map<scalar, dim>> tree(
5759
random, pico_tree::max_leaf_size_t(10));
58-
pico_tree::point_map<scalar, dim> p = tree.space()[random.rows / 2];
60+
pico_tree::point_map<scalar, dim> p = tree.space()[tree.space().size() / 2];
5961

6062
pico_tree::neighbor<index, scalar> nn;
6163
pico_tree::scoped_timer t("pico_tree cv mat", run_count);

examples/pico_toolshed/pico_toolshed/format/format_bin.hpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,24 @@ void write_bin(std::string const& filename, std::vector<T_> const& v) {
1111
return;
1212
}
1313

14-
std::size_t const element_size = sizeof(T_);
1514
std::fstream stream =
1615
internal::open_stream(filename, std::ios::out | std::ios::binary);
17-
stream.write(reinterpret_cast<char const*>(&v[0]), element_size * v.size());
16+
internal::stream_wrapper wrapper(stream);
17+
wrapper.write(v.data(), v.size());
1818
}
1919

2020
template <typename T_>
2121
void read_bin(std::string const& filename, std::vector<T_>& v) {
2222
std::fstream stream =
2323
internal::open_stream(filename, std::ios::in | std::ios::binary);
24+
internal::stream_wrapper wrapper(stream);
2425

2526
auto bytes = std::filesystem::file_size(filename);
2627
std::size_t const element_size = sizeof(T_);
2728
std::size_t const element_count =
2829
static_cast<std::size_t>(bytes) / element_size;
2930
v.resize(element_count);
30-
stream.read(reinterpret_cast<char*>(&v[0]), element_size * element_count);
31+
wrapper.read(element_count, v.data());
3132
}
3233

3334
} // namespace pico_tree

examples/pico_toolshed/pico_toolshed/format/format_mnist.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ inline void read_mnist_images(
5454
throw std::runtime_error("unexpected mnist image dimensions");
5555
}
5656

57-
images.resize(header.image_count);
57+
images.resize(static_cast<std::size_t>(header.image_count));
5858
wrapper.read(images.size(), images.data());
5959
}
6060

@@ -75,7 +75,7 @@ inline void read_mnist_labels(
7575
wrapper.read(header);
7676
header.label_count = big_endian<std::int32_t>{header.label_count};
7777

78-
labels.resize(header.label_count);
78+
labels.resize(static_cast<std::size_t>(header.label_count));
7979
wrapper.read(labels.size(), labels.data());
8080
}
8181

examples/pico_understory/pico_understory/internal/cover_tree_builder.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class build_cover_tree_impl {
4040
std::shuffle(indices.begin(), indices.end(), g);
4141

4242
node_type* node = insert_first_two(indices);
43-
for (index_type i = 2; i < static_cast<index_type>(npts); ++i) {
43+
for (size_t i = 2; i < npts; ++i) {
4444
node = insert(node, create_node(indices[i]));
4545
}
4646

examples/pico_understory/pico_understory/internal/kd_tree_priority_search.hpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,16 @@ class priority_search_nearest_euclidean {
6868
inline void search_nearest(
6969
node_type const* const node, scalar_type node_box_distance) {
7070
if (node->is_leaf()) {
71-
for (index_type i = node->data.leaf.begin_idx;
72-
i < node->data.leaf.end_idx;
73-
++i) {
74-
visitor_(
75-
indices_[i],
76-
metric_(query_.begin(), query_.end(), space_[indices_[i]]));
71+
auto begin = indices_.begin() + node->data.leaf.begin_idx;
72+
auto const end = indices_.begin() + node->data.leaf.end_idx;
73+
for (; begin < end; ++begin) {
74+
visitor_(*begin, metric_(query_.begin(), query_.end(), space_[*begin]));
7775
}
7876
} else {
7977
// Go left or right and then check if we should still go down the other
8078
// side based on the current minimum distance.
81-
scalar_type const v = query_[node->data.branch.split_dim];
79+
scalar_type const v =
80+
query_[static_cast<size_t>(node->data.branch.split_dim)];
8281
scalar_type old_offset;
8382
scalar_type new_offset;
8483
node_type const* node_1st;

examples/python/kd_tree.py

+35-8
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
#!/usr/bin/env python3
22

3-
import pico_tree as pt
3+
import os
44
import numpy as np
5+
import pico_tree as pt
56
from pathlib import Path
67
from time import perf_counter
78
# from scipy.spatial import KDTree as spKDTree
89
# from sklearn.neighbors import KDTree as skKDTree
910
# from pykdtree.kdtree import KDTree as pyKDTree
1011

1112

12-
def tree_creation_and_query_types():
13+
def kd_tree_creation_and_query():
1314
print("*** KdTree Creation And Basic Information ***")
1415
# An input array must have a dimension of two and it must be
1516
# contiguous. A C contiguous array contains points in its rows and
@@ -71,9 +72,9 @@ def tree_creation_and_query_types():
7172
print()
7273

7374
print("*** Box Search ***")
74-
# A box search returns the same data structure as a radius search.
75-
# However, instead of containing neighbors it simply contains
76-
# indices.
75+
# A box search returns the same data structure as a radius
76+
# search. However, instead of containing neighbors it simply
77+
# contains indices.
7778
# An array of input boxes is defined as follows:
7879
# [min_0, max_0, min_1, max_1, ...]
7980
boxes = np.array(
@@ -106,6 +107,31 @@ def tree_creation_and_query_types():
106107
print()
107108

108109

110+
def kd_tree_file_io():
111+
# The save_kd_tree and load_kd_tree functions are considered
112+
# convenience functions to save and load a kd_tree on a single
113+
# machine. Take the following into account when using them:
114+
# * Does not take memory endianness into account.
115+
# * Does not check if the stored tree structure is valid for the
116+
# given point set.
117+
118+
a = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float64, order='C')
119+
t1 = pt.KdTree(a, pt.Metric.L2Squared, 10)
120+
121+
filename = "tree.bin"
122+
# The save_kd_tree *only* saves the tree structure, not the point
123+
# set that was used to create the KdTree.
124+
pt.save_kd_tree(t1, filename)
125+
# Loading the KdTree requires the original point set with which the
126+
# KdTree was originally created.
127+
t2 = pt.load_kd_tree(a, filename)
128+
os.remove(filename)
129+
130+
print(t1)
131+
print(t2)
132+
print()
133+
134+
109135
def array_initialization():
110136
print("*** Array Initialization ***")
111137
p = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float64)
@@ -126,7 +152,7 @@ def array_initialization():
126152
print()
127153

128154

129-
def performance_test_pico_tree():
155+
def kd_tree_performance_test():
130156
print("*** Performance against scans.bin ***")
131157
# The benchmark documentation, docs/benchmark.md section "Running a
132158
# new benchmark", explains how to generate a scans.bin file from an
@@ -175,9 +201,10 @@ def performance_test_pico_tree():
175201

176202

177203
def main():
178-
tree_creation_and_query_types()
204+
kd_tree_creation_and_query()
205+
kd_tree_file_io()
179206
array_initialization()
180-
performance_test_pico_tree()
207+
kd_tree_performance_test()
181208

182209

183210
if __name__ == "__main__":

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
packages=['pico_tree'],
1414
package_dir={'': 'src/pyco_tree'},
1515
cmake_install_dir='src/pyco_tree/pico_tree',
16-
python_requires='>=3.7.3',
16+
python_requires='>=3.10',
1717
install_requires=['numpy'],
1818
)

src/pico_tree/pico_tree/internal/stream_wrapper.hpp

+18-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <fstream>
4+
#include <string>
45
#include <vector>
56

67
namespace pico_tree::internal {
@@ -46,6 +47,15 @@ class stream_wrapper {
4647
read(size, values.data());
4748
}
4849

50+
//! \brief Reads a string from the stream_wrapper.
51+
//! \details Reads the size of the string followed by all its elements.
52+
inline void read(std::string& values) {
53+
typename std::string::size_type size;
54+
read(size);
55+
values.resize(size);
56+
read(size, values.data());
57+
}
58+
4959
//! \brief Reads an array of values from the stream_wrapper.
5060
//! \tparam T_ Type of a value.
5161
template <typename T_>
@@ -70,9 +80,14 @@ class stream_wrapper {
7080
template <typename T_>
7181
inline void write(std::vector<T_> const& values) {
7282
write(values.size());
73-
stream_.write(
74-
reinterpret_cast<char const*>(&values[0]),
75-
static_cast<std::streamsize>(sizeof(T_) * values.size()));
83+
write(values.data(), values.size());
84+
}
85+
86+
//! \brief Writes a string to the stream_wrapper.
87+
//! \details Writes the size of the string followed by all its elements.
88+
inline void write(std::string const& values) {
89+
write(values.size());
90+
write(values.data(), values.size());
7691
}
7792

7893
//! \brief Writes an array of values to the stream_wrapper.

src/pyco_tree/pico_tree/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
__all__ = ['DArray', 'KdTree', 'Metric']
1+
__all__ = ['DArray', 'Metric', 'KdTree', 'load_kd_tree', 'save_kd_tree']
22

3-
from .kd_tree import KdTree
4-
from .metric import Metric
5-
from ._pyco_tree import __doc__, DArray
3+
# TODO Generate .pyi stub files for _pyco_tree.
4+
5+
from ._pyco_tree import __doc__, DArray, Metric, KdTree, load_kd_tree, save_kd_tree

src/pyco_tree/pico_tree/_pyco_tree/core.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,14 @@ struct array_layout {
5555
index_inner(row_major ? 1 : 0),
5656
index_outer(row_major ? 0 : 1) {}
5757

58+
inline py::ssize_t inner_stride() const { return info.shape[index_inner]; }
59+
60+
inline py::ssize_t outer_stride() const { return info.shape[index_outer]; }
61+
5862
py::buffer_info info;
5963
bool row_major;
60-
py::ssize_t index_inner;
61-
py::ssize_t index_outer;
64+
std::size_t index_inner;
65+
std::size_t index_outer;
6266
};
6367

6468
template <pico_tree::size_t Dim_>

0 commit comments

Comments
 (0)