Skip to content

Commit 230377d

Browse files
committed
Pattern Analysis
Adds functionality to analyze the minimum and maximum # of characters a regex may match.
1 parent ac7cb6f commit 230377d

File tree

3 files changed

+250
-4
lines changed

3 files changed

+250
-4
lines changed

include/ctre/evaluation.hpp

+242
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,248 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
447447
// property matching
448448

449449

450+
// pattern analysis - returns the minimum and maximum # of characters in order for a regex to match a string
451+
// -1 is considered INF, -2 is finite (but perhaps too large to store), all other values are exact counts
452+
constexpr CTRE_FORCE_INLINE size_t saturate_limit(const size_t& lhs, const size_t& rhs) {
453+
const constexpr size_t inf = size_t{ 0 } -1;
454+
const constexpr size_t lim = size_t{ 0 } -2;
455+
size_t ret = inf;
456+
if (lhs == inf || rhs == inf) {
457+
return ret;
458+
} else {
459+
ret = lhs + rhs;
460+
ret = ret < lhs ? lim : ret == inf ? lim : ret;
461+
}
462+
return ret;
463+
}
464+
465+
constexpr CTRE_FORCE_INLINE size_t mult_saturate_limit(const size_t& lhs, const size_t& rhs) {
466+
const constexpr size_t inf = size_t{ 0 } -1;
467+
const constexpr size_t lim = size_t{ 0 } -2;
468+
size_t ret = inf;
469+
if (lhs == inf || rhs == inf) {
470+
return ret;
471+
} else if (lhs == 0 || rhs == 0) {
472+
return ret = 0;
473+
} else {
474+
if (lhs > (SIZE_MAX / rhs))
475+
return ret = lim;
476+
ret = lhs * rhs;
477+
ret = ret == inf ? lim : ret;
478+
return ret;
479+
}
480+
}
481+
//a custom std::pair to overload some handy operations that we'll perform w/ a fold
482+
struct analysis_results : std::pair<size_t, size_t> {
483+
constexpr inline CTRE_FORCE_INLINE operator bool() const noexcept {
484+
return first;
485+
}
486+
constexpr auto CTRE_FORCE_INLINE operator+(analysis_results other) const noexcept {
487+
return analysis_results{std::make_pair(
488+
saturate_limit(first, other.first),
489+
saturate_limit(second, other.second)
490+
)};
491+
}
492+
constexpr auto CTRE_FORCE_INLINE operator||(analysis_results other) const noexcept {
493+
return analysis_results{std::make_pair(
494+
std::min(first, other.first),
495+
std::max(second, other.second)
496+
)};
497+
}
498+
};
499+
500+
template <typename Pattern>
501+
static constexpr auto trampoline_analysis(Pattern) noexcept;
502+
503+
template <typename... Patterns>
504+
static constexpr auto trampoline_analysis(ctll::list<Patterns...>) noexcept;
505+
506+
template<typename T, typename R>
507+
static constexpr auto trampoline_analysis(T, R captures) noexcept;
508+
509+
//processing for each type
510+
511+
//repeat
512+
template<size_t A, size_t B, typename R, typename... Content>
513+
static constexpr auto _analyze(repeat<A,B,Content...>, R captures) noexcept {
514+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
515+
if constexpr (sizeof...(Content)) {
516+
ret = trampoline_analysis(ctll::list<Content...>(), captures);
517+
ret.first = mult_saturate_limit(ret.first, A);
518+
ret.second = mult_saturate_limit(ret.second, B);
519+
}
520+
return ret;
521+
}
522+
523+
//note: all * ? + operations are specialized variations of repeat {A,B}
524+
//lazy_repeat
525+
template<size_t A, size_t B, typename R, typename... Content>
526+
static constexpr auto _analyze(lazy_repeat<A, B, Content...>, R captures) noexcept {
527+
return _analyze(repeat<A, B, Content...>(), captures);
528+
}
529+
530+
//possessive_repeat
531+
template<size_t A, size_t B, typename R, typename... Content>
532+
static constexpr auto _analyze(possessive_repeat<A, B, Content...>, R captures) noexcept {
533+
return _analyze(repeat<A, B, Content...>(), captures);
534+
}
535+
536+
//star
537+
template<typename R, typename... Content>
538+
static constexpr auto _analyze(star<Content...>, R captures) noexcept {
539+
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
540+
}
541+
542+
//lazy_star
543+
template<typename R, typename... Content>
544+
static constexpr auto _analyze(lazy_star<Content...>, R captures) noexcept {
545+
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
546+
}
547+
548+
//possessive_star
549+
template<typename R, typename... Content>
550+
static constexpr auto _analyze(possessive_star<Content...>, R captures) noexcept {
551+
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
552+
}
553+
554+
//plus
555+
template<typename R, typename... Content>
556+
static constexpr auto _analyze(plus<Content...>, R captures) noexcept {
557+
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
558+
}
559+
560+
//lazy_plus
561+
template<typename R, typename... Content>
562+
static constexpr auto _analyze(lazy_star<Content...>, R captures) noexcept {
563+
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
564+
}
565+
566+
//possessive_plus
567+
template<typename R, typename... Content>
568+
static constexpr auto _analyze(possessive_star<Content...>, R captures) noexcept {
569+
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
570+
}
571+
572+
//optional
573+
template<typename R, typename... Content>
574+
static constexpr auto _analyze(optional<Content...>, R captures) noexcept {
575+
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
576+
}
577+
578+
//lazy_optional
579+
template<typename R, typename... Content>
580+
static constexpr auto _analyze(lazy_optional<Content...>, R captures) noexcept {
581+
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
582+
}
583+
584+
//back_reference
585+
template<size_t Id, typename R>
586+
static constexpr auto _analyze(back_reference<Id>, R captures) noexcept {
587+
const auto ref = captures.template get<Id>();
588+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
589+
if constexpr (size(ref.get_expression())) {
590+
ret = trampoline_analysis(ref.get_expression(), captures);
591+
}
592+
return ret;
593+
}
594+
595+
//back_reference_with_name
596+
template<typename Name, typename R>
597+
static constexpr auto _analyze(back_reference_with_name<Name>, R captures) noexcept {
598+
const auto ref = captures.template get<Name>();
599+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
600+
if constexpr (size(ref.get_expression())) {
601+
ret = trampoline_analysis(ref.get_expression(), captures);
602+
}
603+
return ret;
604+
}
605+
606+
//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums
607+
template<typename R, typename... Content>
608+
static constexpr auto _analyze(select<Content...>, R captures) noexcept {
609+
analysis_results ret = trampoline_select_analysis(ctll::list<Content...>(), captures);
610+
return ret;
611+
}
612+
613+
//character, any character contributes exactly one to both counts
614+
template<auto C, typename R>
615+
static constexpr auto _analyze(character<C>, R captures) noexcept {
616+
analysis_results ret{ std::make_pair(1ULL, 1ULL) };
617+
return ret;
618+
}
619+
620+
//strings, any string contributes the # of characters it contains (if we have an empty string that'll be 0)
621+
template<auto... Str, typename R>
622+
static constexpr auto _analyze(string<Str...>, R captures) noexcept {
623+
analysis_results ret{ std::make_pair(sizeof...(Str), sizeof...(Str)) };
624+
return ret;
625+
}
626+
627+
//we'll process anything that has contents as a regex
628+
//ctll::list
629+
template<typename R, typename... Content>
630+
static constexpr auto _analyze(ctll::list<Content...>,R captures) noexcept {
631+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
632+
return ret;
633+
}
634+
635+
//sequence
636+
template<typename R, typename... Content>
637+
static constexpr auto _analyze(sequence<Content...>, R captures) noexcept {
638+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
639+
return ret;
640+
}
641+
642+
//capture
643+
template<size_t Id, typename R, typename... Content>
644+
static constexpr auto _analyze(capture<Id, Content...>, R captures) noexcept {
645+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
646+
return ret;
647+
}
648+
649+
//capture_with_name
650+
template<size_t Id, typename Name, typename R, typename... Content>
651+
static constexpr auto _analyze(capture_with_name<Id, Name, Content...>, R captures) noexcept {
652+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
653+
return ret;
654+
}
655+
656+
//everything else, anything we haven't matched already isn't supported and will contribute 0
657+
template<typename T, typename R>
658+
static constexpr auto _analyze(T, R captures) noexcept {
659+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
660+
return ret;
661+
}
662+
//note: ctll::list wraps patterns just like sequences, we'll treat anything that looks like a regex w/ ctll::list
663+
template <typename... Patterns, typename R>
664+
static constexpr auto trampoline_analysis(ctll::list<Patterns...>, R captures) noexcept {
665+
//fold, for every argument in a ctll::list, calculate its contribution to the limits
666+
auto r = ((_analyze(Patterns(), captures)) + ...);
667+
//note any reordering of parameters will result in the same limits
668+
return r;
669+
}
670+
671+
template <typename... Patterns, typename R>
672+
static constexpr auto trampoline_select_analysis(ctll::list<Patterns...>, R captures) noexcept {
673+
//fold, each argument in a selection of regexes we take the minimum and maximum of all values
674+
auto r = ((trampoline_analysis(Patterns(), captures)) || ...);
675+
//note again, order is unimportant
676+
return r;
677+
}
678+
679+
template <typename... Patterns>
680+
static constexpr auto pattern_analysis(ctll::list<Patterns...>) noexcept {
681+
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
682+
return trampoline_analysis(ctll::list<Patterns...>(), return_type{});
683+
}
684+
685+
template <typename Pattern = empty>
686+
static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept {
687+
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
688+
return trampoline_analysis(ctll::list<Pattern>(), return_type{});
689+
}
690+
691+
450692
}
451693

452694
#endif

include/ctre/find_captures.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,12 @@ template <typename... Content, typename... Tail, typename Output> constexpr auto
112112

113113

114114
template <size_t Id, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture<Id,Content...>, Tail...>, ctll::list<Output...>) noexcept {
115-
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id>>());
115+
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, void, ctll::list<Content...>>>());
116116
}
117117

118118

119119
template <size_t Id, typename Name, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture_with_name<Id,Name,Content...>, Tail...>, ctll::list<Output...>) noexcept {
120-
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name>>());
120+
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name, ctll::list<Content...>>>());
121121
}
122122

123123

include/ctre/return_type.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ struct not_matched_tag_t { };
1313

1414
static constexpr inline auto not_matched = not_matched_tag_t{};
1515

16-
template <size_t Id, typename Name = void> struct captured_content {
16+
template <size_t Id, typename Name = void, typename Content = void> struct captured_content {
1717
template <typename Iterator> class storage {
1818
Iterator _begin{};
1919
Iterator _end{};
2020

2121
bool _matched{false};
2222
public:
2323
using char_type = typename std::iterator_traits<Iterator>::value_type;
24-
24+
using content_type = Content;
2525
using name = Name;
2626

2727
constexpr CTRE_FORCE_INLINE storage() noexcept {}
@@ -86,6 +86,10 @@ template <size_t Id, typename Name = void> struct captured_content {
8686
constexpr CTRE_FORCE_INLINE static size_t get_id() noexcept {
8787
return Id;
8888
}
89+
90+
constexpr CTRE_FORCE_INLINE static content_type get_expression() noexcept {
91+
return {};
92+
}
8993
};
9094
};
9195

0 commit comments

Comments
 (0)