Skip to content

Pattern Analysis #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 250 additions & 0 deletions include/ctre/evaluation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

namespace ctre {

struct analysis_results;
template<typename T = ctll::list<>, typename R = ctll::list<>>
static constexpr analysis_results trampoline_analysis(T, R captures) noexcept;

// calling with pattern prepare stack and triplet of iterators
template <typename Iterator, typename EndIterator, typename Pattern>
constexpr inline auto match_re(const Iterator begin, const EndIterator end, Pattern pattern) noexcept {
Expand Down Expand Up @@ -447,6 +451,252 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
// property matching


// pattern analysis - returns the minimum and maximum # of characters in order for a regex to match a string
// a custom std::pair to overload some handy operations that we'll perform w/ a fold
struct analysis_results : std::pair<size_t, size_t> {
// -1 is considered INF, -2 is finite (but perhaps too large to store), all other values are exact counts
static constexpr CTRE_FORCE_INLINE size_t saturate_limit(const size_t& lhs, const size_t& rhs) {
const constexpr size_t inf = size_t{ 0 } -1;
const constexpr size_t lim = size_t{ 0 } -2;
size_t ret = inf;
if (lhs == inf || rhs == inf) {
return ret;
}
else {
ret = lhs + rhs;
ret = ret < lhs ? lim : ret == inf ? lim : ret;
}
return ret;
}

static constexpr CTRE_FORCE_INLINE size_t mult_saturate_limit(const size_t& lhs, const size_t& rhs) {
const constexpr size_t inf = size_t{ 0 } -1;
const constexpr size_t lim = size_t{ 0 } -2;
size_t ret = inf;
if (lhs == inf || rhs == inf) {
return ret;
}
else if (lhs == 0 || rhs == 0) {
return ret = 0;
}
else {
if (lhs > (SIZE_MAX / rhs))
return ret = lim;
ret = lhs * rhs;
ret = ret == inf ? lim : ret;
return ret;
}
}

constexpr inline CTRE_FORCE_INLINE operator bool() const noexcept {
return first > 0;
}
friend constexpr auto CTRE_FORCE_INLINE operator+(const analysis_results &lhs, const analysis_results &other) noexcept {
return analysis_results{ std::make_pair(
saturate_limit(lhs.first, other.first),
saturate_limit(lhs.second, other.second)
) };
}
friend constexpr auto CTRE_FORCE_INLINE operator||(const analysis_results& lhs, const analysis_results& other) noexcept {
return analysis_results{ std::make_pair(
std::min(lhs.first, other.first),
std::max(lhs.second, other.second)
)};
}
constexpr analysis_results& operator =(const analysis_results& rhs) {
first = rhs.first;
second = rhs.second;
return *this;
}
};

template <typename... Patterns>
static constexpr auto pattern_analysis(ctll::list<Patterns...>) noexcept;

template <typename Pattern = empty>
static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept;

//processing for each type
//repeat
template<size_t A, size_t B, typename R, typename... Content>
static constexpr analysis_results _analyze(repeat<A, B, Content...>, R captures) noexcept {
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
if constexpr (sizeof...(Content)) {
ret = trampoline_analysis(ctll::list<Content...>(), captures);
ret.first = analysis_results::mult_saturate_limit(ret.first, A);
ret.second = analysis_results::mult_saturate_limit(ret.second, B);
}
return ret;
}

//note: all * ? + operations are specialized variations of repeat {A,B}
//lazy_repeat
template<size_t A, size_t B, typename R, typename... Content>
static constexpr analysis_results _analyze(lazy_repeat<A, B, Content...>, R captures) noexcept {
return _analyze(repeat<A, B, Content...>(), captures);
}

//possessive_repeat
template<size_t A, size_t B, typename R, typename... Content>
static constexpr analysis_results _analyze(possessive_repeat<A, B, Content...>, R captures) noexcept {
return _analyze(repeat<A, B, Content...>(), captures);
}

//star
template<typename R, typename... Content>
static constexpr analysis_results _analyze(star<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
}

//lazy_star
template<typename R, typename... Content>
static constexpr analysis_results _analyze(lazy_star<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
}

//possessive_star
template<typename R, typename... Content>
static constexpr analysis_results _analyze(possessive_star<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
}

//plus
template<typename R, typename... Content>
static constexpr analysis_results _analyze(plus<Content...>, R captures) noexcept {
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
}

//lazy_plus
template<typename R, typename... Content>
static constexpr analysis_results _analyze(lazy_plus<Content...>, R captures) noexcept {
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
}

//possessive_plus
template<typename R, typename... Content>
static constexpr analysis_results _analyze(possessive_plus<Content...>, R captures) noexcept {
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
}

//optional
template<typename R, typename... Content>
static constexpr analysis_results _analyze(optional<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
}

//lazy_optional
template<typename R, typename... Content>
static constexpr analysis_results _analyze(lazy_optional<Content...>, R captures) noexcept {
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
}

//back_reference
template<size_t Id, typename R>
static constexpr analysis_results _analyze(back_reference<Id>, R captures) noexcept {
const auto ref = captures.template get<Id>();
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
if constexpr (size(ref.get_expression())) {
ret = trampoline_analysis(ref.get_expression(), captures);
}
return ret;
}

//back_reference_with_name
template<typename Name, typename R>
static constexpr analysis_results _analyze(back_reference_with_name<Name>, R captures) noexcept {
const auto ref = captures.template get<Name>();
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
if constexpr (size(ref.get_expression())) {
ret = trampoline_analysis(ref.get_expression(), captures);
}
return ret;
}

//CharacterLike, anything that's like a character contributes 1 to both counts
template <typename R, typename CharacterLike, typename = std::enable_if_t<(MatchesCharacter<CharacterLike>::template value<decltype(*std::declval<std::basic_string_view<char>::iterator>())>)>>
static constexpr analysis_results _analyze(CharacterLike, R captures) {
analysis_results ret{ std::make_pair(1ULL, 1ULL) };
return ret;
}

//strings, any string contributes the # of characters it contains (if we have an empty string that'll be 0)
template<auto... Str, typename R>
static constexpr analysis_results _analyze(string<Str...>, R captures) noexcept {
analysis_results ret{ std::make_pair(sizeof...(Str), sizeof...(Str)) };
return ret;
}

//we'll process anything that has contents as a regex
//ctll::list
template<typename R, typename... Content>
static constexpr analysis_results _analyze(ctll::list<Content...>, R captures) noexcept {
analysis_results ret{ trampoline_analysis(ctll::list<Content...>(), captures) };
return ret;
}

//sequence
template<typename R, typename... Content>
static constexpr analysis_results _analyze(sequence<Content...>, R captures) noexcept {
analysis_results ret{ trampoline_analysis(ctll::list<Content...>(), captures) };
return ret;
}

//capture
template<size_t Id, typename R, typename... Content>
static constexpr analysis_results _analyze(capture<Id, Content...>, R captures) noexcept {
analysis_results ret{ trampoline_analysis(ctll::list<Content...>(), captures) };
return ret;
}

//capture_with_name
template<size_t Id, typename Name, typename R, typename... Content>
static constexpr analysis_results _analyze(capture_with_name<Id, Name, Content...>, R captures) noexcept {
analysis_results ret{ trampoline_analysis(ctll::list<Content...>(), captures) };
return ret;
}

//everything else, anything we haven't matched already isn't supported and will contribute 0
template<typename T, typename R, typename = std::enable_if_t<(!MatchesCharacter<T>::template value<decltype(*std::declval<std::basic_string_view<char>::iterator>())>)>>
static constexpr analysis_results _analyze(T, R captures) noexcept {
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
return ret;
}

//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums
template<typename R, typename... Content>
static constexpr analysis_results _analyze(select<Content...>, R captures) noexcept {
analysis_results ret = ((trampoline_analysis(Content(), captures)) || ...);
return ret;
}

//note: ctll::list wraps patterns just like sequences, we'll treat anything that looks like a regex w/ ctll::list
template <typename... Patterns, typename R>
static constexpr analysis_results trampoline_analysis(ctll::list<Patterns...>, R captures) noexcept {
//fold, for every argument in a ctll::list, calculate its contribution to the limits
analysis_results ret = ((_analyze(Patterns(), captures)) + ...);
//note any reordering of parameters will result in the same limits
return ret;
}

template <typename Pattern, typename R>
static constexpr analysis_results trampoline_analysis(Pattern pattern, R captures) noexcept {
//some individual type, we can immediately analyze it
analysis_results ret = _analyze(pattern, captures);
return ret;
}

template <typename... Patterns>
static constexpr auto pattern_analysis(ctll::list<Patterns...>) noexcept {
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
return trampoline_analysis(ctll::list<Patterns...>(), return_type{});
}

template <typename Pattern = empty>
static constexpr auto pattern_analysis(Pattern pattern) noexcept {
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
return trampoline_analysis(ctll::list<Pattern>(), return_type{});
}

}

#endif
4 changes: 2 additions & 2 deletions include/ctre/find_captures.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ template <typename... Content, typename... Tail, typename Output> constexpr auto


template <size_t Id, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture<Id,Content...>, Tail...>, ctll::list<Output...>) noexcept {
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id>>());
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, void, ctll::list<Content...>>>());
}


template <size_t Id, typename Name, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture_with_name<Id,Name,Content...>, Tail...>, ctll::list<Output...>) noexcept {
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name>>());
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name, ctll::list<Content...>>>());
}


Expand Down
2 changes: 1 addition & 1 deletion include/ctre/return_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,4 +351,4 @@ namespace std {
#endif
#endif

#endif
#endif