-
Notifications
You must be signed in to change notification settings - Fork 228
/
Copy pathluanma.pl
1565 lines (1401 loc) · 47.7 KB
/
luanma.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
use strict; use warnings;
$INC{'Encode/ConfigLocal.pm'}=1;
use Encode;
eval { local $SIG{__DIE__}; # silence weechat die handler
require Encode::HanExtra }; # more chinese
eval { local $SIG{__DIE__};
require Encode::JIS2K }; # more japanese
use Time::Local;
# luanma.pl is written by Nei <anti.teamidiot.de>
# and licensed under the under GNU General Public License v3
# or any later version
# to read the following docs, you can use "perldoc luanma.pl"
=head1 NAME
luanma - store more info about encoding of message, and change it with update (weechat edition)
=head1 SYNOPSIS
more help for charset troubles
command is called /lma
see "/help lma" for usage
=head1 DESCRIPTION
luanma will allow you to view received messages as they would appear
when decoded using different charsets. you might know this feature
from your webbrowser. it is useful if you need to understand a message
that was received, but it looks garbled because the sender used
different charset than you.
as usual, a list of charsets can be defined that will be tried in
consecution until a successful decode is made.
the charset can be choosen differently for different times in the past
and based on nick and weechat buffer. furthermore, you can use /debug
tags to see which charset was used to decode a message.
a table of charset rules will be saved to luanma.conf and can be
edited with /lma set command.
=head1 CAVEATS
=over
=item *
the automatic encoding of outgoing notices is visible on display and
no assessment of success is given, because weechat does not set the
appropriate tag on outgoing notices
=item *
in order to not convert all messages as raw, only high bit data
(extended ascii) is encoded. that means the script works fine for
latin variants and utf8, but B<not> for any 7bit-clean encoding or for
national EBCDIC
=item *
colours might get mixed up with colorize_nicks script when there is a
nick with the same name as a 2-hex-character encoding (example:
"b2"). One possible workaround is to turn off greedy_matching in
colorize_nicks
=item *
no encoding is done when outgoing charset is specified as 'utf8'
=back
=head1 BUGS
=over
=item *
splitting of messages is not supported, so if byte-length of message
in utf8 exceeds 510, it will get split by weechat. result is that only
the first part is encoded properly.
if byte-length of B<encoded> message exceeds 510, then it will usually
get cut off (exact behaviour depends on IRC server)
=item *
the prefix on ACTION messages ('/me') gets recoded for messages that
you send yourself. this will cause problems with utf8 nicknames where
supported
=back
=head1 SETTINGS
the settings are usually found in the
plugins.var.perl.luanma
namespace, that is, type
/set plugins.var.perl.luanma.*
to see them and
/set plugins.var.perl.luanma.SETTINGNAME VALUE
to change a setting C<SETTINGNAME> to a new value C<VALUE>. Finally,
/unset plugins.var.perl.luanma.SETTINGNAME
will reset a setting to its default value.
the following settings are available:
=head2 tags
white-space separated list of irc_(in_) tags to store raw messages of
(only those can be recoded). see /debug tags
=head2 encode_warn
add a warning message into the line displayed on your buffer, when
encoding of outgoing messages fails/is lossy
=head2 parser
parser to use for line parsing. valid options: ondemand, async,
full. ondemand will parse lines when displayed on screen (needs parse
on every buffer switch, but fast on load). async and full do not need
to parse lines when switching buffers, but WILL FREEZE your weechat on
/script (re)load and /upgrade. be careful.
async uses timers to do the parsing which should make it less likely
for you to drop network connection. full will do the parse in one
swipe, so it is faster and the freeze is of shorter duration.
=cut
use constant SCRIPT_NAME => 'luanma';
weechat::register(SCRIPT_NAME, 'Nei <anti.teamidiot.de>', '0.3', 'GPL3',
'more flexibility with incoming charset', 'stop_luanma', '') || return;
sub SCRIPT_FILE() {
my $infolistptr = weechat::infolist_get('perl_script', '', SCRIPT_NAME);
my $filename = weechat::infolist_string($infolistptr, 'filename') if weechat::infolist_next($infolistptr);
weechat::infolist_free($infolistptr);
return $filename unless @_;
}
{
package Nlib;
# this is a weechat perl library
use strict; use warnings; no warnings 'redefine';
## hdh -- hdata helper
## $_[0] - arg pointer or hdata list name
## $_[1] - hdata name
## $_[2..$#_] - hdata variable name
## $_[-1] - hashref with key/value to update (optional)
## returns value of hdata, and hdata name in list ctx, or number of variables updated
sub hdh {
if (@_ > 1 && $_[0] !~ /^0x/ && $_[0] !~ /^\d+$/) {
my $arg = shift;
unshift @_, weechat::hdata_get_list(weechat::hdata_get($_[0]), $arg);
}
while (@_ > 2) {
my ($arg, $name, $var) = splice @_, 0, 3;
my $hdata = weechat::hdata_get($name);
unless (ref $var eq 'HASH') {
$var =~ s/!(.*)/weechat::hdata_get_string($hdata, $1)/e;
(my $plain_var = $var) =~ s/^\d+\|//;
my $type = weechat::hdata_get_var_type_string($hdata, $plain_var);
if ($type eq 'pointer') {
my $name = weechat::hdata_get_var_hdata($hdata, $var);
unshift @_, $name if $name;
}
my $fn = "weechat::hdata_$type";
unshift @_, do { no strict 'refs';
&$fn($hdata, $arg, $var) };
}
else {
return weechat::hdata_update($hdata, $arg, $var);
}
}
wantarray ? @_ : $_[0]
}
## hook_dynamic -- weechat::hook something and store hook reference
## $hook_call - hook type (e.g. modifier)
## $what - event type to hook (depends on $hook_call)
## $sub - subroutine name to install
## @params - parameters
sub hook_dynamic {
my ($hook_call, $what, $sub, @params) = @_;
my $caller_package = (caller)[0];
eval qq{
package $caller_package;
no strict 'vars';
\$DYNAMIC_HOOKS{\$what}{\$sub} =
weechat::hook_$hook_call(\$what, \$sub, \@params)
unless exists \$DYNAMIC_HOOKS{\$what} &&
exists \$DYNAMIC_HOOKS{\$what}{\$sub};
};
die $@ if $@;
}
## unhook_dynamic -- weechat::unhook something where hook reference has been stored with hook_dynamic
## $what - event type that was hooked
## $sub - subroutine name that was installed
sub unhook_dynamic {
my ($what, $sub) = @_;
my $caller_package = (caller)[0];
eval qq{
package $caller_package;
no strict 'vars';
weechat::unhook(\$DYNAMIC_HOOKS{\$what}{\$sub})
if exists \$DYNAMIC_HOOKS{\$what} &&
exists \$DYNAMIC_HOOKS{\$what}{\$sub};
delete \$DYNAMIC_HOOKS{\$what}{\$sub};
delete \$DYNAMIC_HOOKS{\$what} unless \%{\$DYNAMIC_HOOKS{\$what}};
};
die $@ if $@;
}
sub fu8on(@) {
Encode::_utf8_on($_) for @_; wantarray ? @_ : shift
}
use Pod::Select qw();
use Pod::Simple::TextContent;
## get_desc_from_pod -- return setting description from pod documentation
## $file - filename with pod
## $setting - name of setting
## returns description as text
sub get_desc_from_pod {
my $file = shift;
return unless -s $file;
my $setting = shift;
open my $pod_sel, '>', \my $ss;
Pod::Select::podselect({
-output => $pod_sel,
-sections => ["SETTINGS/$setting"]}, $file);
my $pt = new Pod::Simple::TextContent;
$pt->output_string(\my $ss_f);
$pt->parse_string_document($ss);
my ($res) = $ss_f =~ /^\s*\Q$setting\E\s+(.*)\s*/;
$res
}
## get_settings_from_pod -- retrieve all settings in settings section of pod
## $file - file with pod
## returns list of all settings
sub get_settings_from_pod {
my $file = shift;
return unless -s $file;
open my $pod_sel, '>', \my $ss;
Pod::Select::podselect({
-output => $pod_sel,
-sections => ["SETTINGS//!.+"]}, $file);
$ss =~ /^=head2\s+(.*)\s*$/mg
}
1
}
use constant CMD_NAME => 'lma';
our @nags;
our $nag_tag;
our %nag_modifiers;
our $weechat_dir = weechat::info_get('weechat_config_dir', '');
$weechat_dir = weechat::info_get('weechat_dir', '') if (!$weechat_dir);
our $CFG_FILE_NAME = $weechat_dir.weechat::info_get('dir_separator', '').SCRIPT_NAME.'.conf';
our (@CFG_TABLE, @CFG_TABLE_2);
our @STO = (\(our (%BYTE_MSGS, %ESC_MSG, %MSG_TIME, %MSG_BUF, %MSG_NICK, %MSG_ENC, %MSG_FLT, %MSG_COLOR)));
our (@ENCODE_TABLE, @ENCODE_TABLE2);
our %DEC;
our $GC_COUNT;
our $GC_LIMIT = 10_000;
our $PARSE_STATS = 987;
our $ASYNC_PARSE = $PARSE_STATS;
our %ASYNC_BUF;
our $ASYNC_TIMER;
our @mon = qw(jan feb mar apr may jun jul aug sep oct nov dec);
our %mon = do { my $i = 0; map { $_ => $i++ } @mon };
our $mon_re = join '|', @mon;
## esc1 -- escape all endangered characters
## @_ - strings to modify
sub esc1 {
for (@_) {
# need to fix up escape here, weechat kills it
# see grep { weechat::string_remove_color(chr $_, "") ne chr $_ } (000..0177)
# our escape bracket is 020
s/([^\000-\017\021-\030\035-\175\177])/sprintf "\020%x\020", ord $1/ge;
}
}
## esc_only -- message needs no recode
## $_[0] - message string to check
## returns bool
sub esc_only {
$_[0] !~ /[^\000-\032\034-\175\177]/
}
init_luanma();
weechat::hook_config('plugins.var.perl.'.SCRIPT_NAME.'.*', 'default_options', '');
weechat::hook_signal('buffer_line_added', 'line_sig', '');
weechat::hook_signal('upgrade', 'restore_lines', '');
weechat::hook_modifier('input_text_for_buffer', 'auto_encode_mod', '');
weechat::hook_command(CMD_NAME, 'a better /charset',
(join ' || ', 'list',
'set <ts> <buffer> <nick> [<encodings...>] [-out <encoding>]',
'set -out <buffer> <pattern> <encoding>',
'del <ts> <buffer> <nick> [-g]',
'del -out <buffer> <pattern> [-g]',
'save',
'reload',
'list_rules',
'gc',
'forget -yes',
), (join "\n",
'without arguments, the list of keys is displayed',
'',
' list: show list of current recode rules',
' set: adds or modifies a recode rule',
' del: delete one or many recode rules',
' save: save rules to config file',
' reload: reload rules from config file',
'list_rules: list internal rules and pointers (for debug and /debug tags)',
' gc: remove raw lines from cache that are no longer valid in weechat (this is also done autoatically)',
" forget: forget everything about messages, forget all raw and all ESC messages. be careful, /@{[CMD_NAME]} cannot be used anymore after youx do this!",
'',
' ts: timestamp at which the rule starts to become effective',
' the following time specifications are supported:',
' 1357986420: unix timestamp as output by `date +%s\' (used by weechat internally)',
' -59 | -59m: relative time, 59 seconds/minutes /',
' -23h | -1d: 23 hours / 1 day ago',
' HH:MM:SS : time (hour:minutes:seconds)',
' Jan01 : date (format: MonDD)',
' Jan0100:00:00 = midnight on January 1st.',
' 1: from the beginning on',
' 0: starting from now',
' the "del" command additionally supports these specifiers:',
' *: any time',
' >ts: rule effective after ts',
' <ts: rule effective before ts',
' ts-ts: rule effective between ts1 and ts2',
'',
' buffer: buffer name in the format network.#channel or network.nick',
' * is allowed as wildcard',
'',
' nick: additionally nick tag to match',
' * is allowed as wildcard',
' if nick ends with "+", buffer is more important when selecting rule',
' when unclear about the order in which rules apply, you can verify with "list_rules"',
'',
' pattern: pattern which is matched on input line to decide automatic encoding',
' * is allowed as wildcard, use ** to break space boundaries',
'',
' -out: edit the output encoding list instead of decoding rules list',
'',
' -g: when used after the "del" command, wildcards are used on the settings list to mass-delete matching entries. otherwise, wildcards match the rule that has this exact wildcard',
'',
' encodings: list of whitespace separated encodings to try, in order, to decode incoming message',
' see `man Encode::Supported\' for a list of supported encodings',
' special encoding "x" means do not decode',
' an "!" can be added after utf8 to signify that partial decoding is acceptable',
' (for example invalid utf8 resulting by last character cut short)',
), (join ' || ', 'list %-',
'set %- %(buffers_names) %(nick)',
'del %- %(buffers_names) %(nick) -g %-',
'save %-',
'reload %-',
'list_rules %-',
'gc %-',
'forget %-',
), 'lma_cmd', '');
weechat::hook_command_run('/save', 'lma_wee_save', 'save');
weechat::hook_command_run('/reload', 'lma_wee_save', 'reload');
## irc_in_mod -- replace high bits (before charset decode)
## () - modifier handler
## $_[1] - modifier
## $_[3] - content
## returns modified content
sub irc_in_mod {
my ($p, $x, $s) = split '( :)', $_[3], 2;
return $_[3] unless defined $s;
esc1($s);
"$p$x$s"
}
## irc_out_mod -- encode utf8 to local charset on send
## () - modifier handler
## $_[1] - modifier
## $_[3] - content
## returns modified content
sub irc_out_mod {
my ($p, $x, $s) = split '( :)', $_[3], 2;
return $_[3] unless defined $s;
Encode::_utf8_on($s);
my $codecs = join '|', map { quotemeta } sort { length $b <=> length $a } keys %DEC;
if ($s =~ s/^\02010\020/\020/) {}
elsif ($s =~ s/^\020($codecs)\020//) {
$s = $DEC{$1}->encode($s, Encode::FB_DEFAULT); # must make best effort here
}
elsif ($p =~ /^PRIVMSG/ && $s =~ /^(\01ACTION )\020($codecs)\020(.*)(\01)$/) { # /me
$s = "\01ACTION ".$DEC{$2}->encode($3, Encode::FB_DEFAULT)."\01"; # avoid upgrade
}
else {
return $_[3]
}
"$p$x$s"
}
## auto_encode_mod -- add encoding prefixes to buffer input line
## () - modifier handler
## $_[1] - modifier
## $_[3] - content of line before sending
sub auto_encode_mod {
# XXX should do the splitting
my $in = Nlib::fu8on(weechat::string_input_for_buffer($_[3]));
return $_[3] unless $in; # pass through commands
return $_[3] unless exists $nag_modifiers{privmsg};
return $_[3] if $in =~ /^\020/; # already marked
my $buf = Nlib::hdh($_[2], 'buffer', 'name');
my ($r) =
grep { $buf =~ $_->{buf_re} && $in =~ $_->{pat_re} } @ENCODE_TABLE2;
return $_[3] unless $r;
return $_[3] if $r->{_}{charset} eq 'utf8'; # XXX
"\020${$r}{_}{charset}\020$in"
}
## auto_encode_cmd -- add tag to command for encode marker
## () - command_run handler
## $_[0] - forward to which command
## $_[1] - buffer pointer
## $_[2] - command
sub auto_encode_cmd {
# XXX should do the splitting
Encode::_utf8_on($_[2]);
#my @args = split ' ', $_[2];
my ($pre, $in, $buf);
if ($_[0] eq 'me' && $_[2] =~ /^(\S+\s)(.*)$/i) {
($pre, $in) = ($1, $2);
$buf = weechat::buffer_get_string($_[1], 'name');
}
elsif ($_[0] eq 'msg' && $_[2] =~ /^(\S+(?:\s+-server\s+(\S+))?\s+(\S+) )(.*)$/i) {
my ($srv, $targ) = ($2, $3);
($pre, $in) = ($1, $4);
$srv //= weechat::buffer_get_string($_[1], 'localvar_server');
$buf = $targ ne '*' ? "$srv.$targ" : weechat::buffer_get_string($_[1], 'name');
}
elsif ($_[0] eq 'query' && $_[2] =~ /^(\S+(?:\s+-server\s+(\S+))?\s+(\S+) )(\s*\S.*)$/i) {
my ($srv, $targ) = ($2, $3);
($pre, $in) = ($1, $4);
$srv //= weechat::buffer_get_string($_[1], 'localvar_server');
$buf = "$srv.$targ";
}
elsif ($_[0] eq 'wallchops' && $_[2] =~ /^(\S+(?:\s+([#&]\S+))? )(.*)$/i) {
my $targ = $2;
($pre, $in) = ($1, $3);
$buf = $targ ? weechat::buffer_get_string($_[1], 'localvar_server').'.'.$targ :
weechat::buffer_get_string($_[1], 'name');
}
elsif ($_[0] eq 'topic' && $_[2] !~ /\s-delete\s*$/i && $_[2] =~ /^(\S+(?:\s+([#&\S+]))? )(.*)$/i) {
my $targ = $2;
($pre, $in) = ($1, $3);
$buf = $targ ? weechat::buffer_get_string($_[1], 'localvar_server').'.'.$targ :
weechat::buffer_get_string($_[1], 'name');
}
else {
return weechat::WEECHAT_RC_OK
}
return weechat::WEECHAT_RC_OK if $in =~ /^\020/; # already marked
my ($r) =
grep { $buf =~ $_->{buf_re} && $in =~ $_->{pat_re} } @ENCODE_TABLE2;
return weechat::WEECHAT_RC_OK unless $r;
return weechat::WEECHAT_RC_OK if $r->{_}{charset} eq 'utf8'; # XXX
weechat::command($_[1], "$pre\020${$r}{_}{charset}\020$in");
return weechat::WEECHAT_RC_OK_EAT
}
## find_rule -- find rule to recode this line
## $time - timestamp of line
## $buf - buffer name
## $nick - nick
## returns rule if found or undef
sub find_rule {
my ($time, $buf, $nick) = @_;
my ($r) =
grep { $_->{_}{time} <= $time && $buf =~ $_->{buf_re} && $nick =~ $_->{nick_re} } @CFG_TABLE_2;
$r
}
## apply_recode -- recode a line, looking up its rule first
## $lp - pointer to 'line' hdata
sub apply_recode {
my $lp = shift;
my $rule = find_rule($MSG_TIME{$lp}, $MSG_BUF{$lp}, $MSG_NICK{$lp})//\undef;
return if $rule == $MSG_FLT{$lp};
my ($s, $e);
for my $enc ((($rule == \undef) ? () : @{$rule->{_}{charsets}}), 'x') {
$s = $BYTE_MSGS{$lp};
if ($enc eq 'x') {
esc1($s);
$e = $enc;
last;
}
else {
my $enc2 = $enc;
my $partial = $enc2 =~ s/!$//;
next if $enc2 eq 'hz' && $s =~ /[^\000-\177]/; # hack for hz
# put further hacks here...
my $t = $DEC{$enc2}->decode($s, Encode::FB_QUIET); # FB_CROAK not reliable
#$t =~ s/[[:cntrl:]]//g;
if (length $t && !length $s) { # decoding succeeds
$s = $t;
$e = $enc2;
last;
}
elsif (length $t && $partial) {
esc1($s);
$s = $t . '<?>' . $s;
$e = $enc2 . '_loss';
last;
}
}
}
if ($MSG_ENC{$lp} ne $e) {
my @line_data = Nlib::hdh((sprintf '0x%x', $lp), 'line', 'data');
my @tags = grep { !/^lma_/ } map { Nlib::hdh(@line_data, "$_|tags_array") }
0 .. Nlib::hdh(@line_data, 'tags_count')-1;
my @ctrl_res = split "\0", $MSG_COLOR{$lp}, -1;
my $c = 1;
$s =~ s/\01+/$ctrl_res[$c++]/g;
Nlib::hdh(@line_data, +{
message => $s,
tags_array => (join ',', (($e eq 'x') ? () : ("lma_$e", (sprintf 'lma_0x%x', $rule))), @tags),
});
$MSG_ENC{$lp} = $e;
}
$MSG_FLT{$lp} = $rule;
}
## line_sig -- decode charset previously replaced and fix up outgoing msgs
## () - signal handler
## $_[2] - line ptr
sub line_sig {
my @line_data = Nlib::hdh($_[2], 'line', 'data');
my $lp = oct $_[2];
$ASYNC_BUF{$lp} = undef if $ASYNC_TIMER; # we are still in async reread loop, mark this line as seen
return weechat::WEECHAT_RC_OK unless Nlib::hdh(@line_data, 'buffer', 'plugin', 'name') eq 'irc';
my @tags = map { Nlib::hdh(@line_data, "$_|tags_array") }
0 .. Nlib::hdh(@line_data, 'tags_count')-1;
return weechat::WEECHAT_RC_OK unless grep /$nag_tag/i, @tags;
my $message_c = Nlib::hdh(@line_data, 'message');
return weechat::WEECHAT_RC_OK unless $message_c =~ /\020/;
my $message = my $message_nc = weechat::string_remove_color($message_c, "\1");
if (defined $_[0] && grep { $_ eq 'no_highlight' } @tags) { # might be own msg, $_[0] == undef on history parsing
my $action_pfx_re = qr//;
if (grep { $_ eq 'irc_action' } @tags) {
# XXX might erroneously recode the in-line prefix (utf8 nicks anyone? ircx?!)
$action_pfx_re = qr/\S+ \K/;
}
my $codecs = join '|', map { quotemeta } sort { length $b <=> length $a } keys %DEC;
if ($message =~ /^\02010\020/) {} # fall through
elsif ($message =~ s/^$action_pfx_re\020($codecs)\020//) {
my $dec = $1;
my @ctrl_res;
if ($message_nc =~ /\01/) {
my $id_control = quotemeta $message_nc;
$id_control =~ s/(\\\01)+/(.+?)/g;
@ctrl_res = $message_c =~ /^()$id_control()$/;
}
$message_nc =~ s/^$action_pfx_re\020\Q$dec\E\020//;
Encode::_utf8_on($message);
my $s = $DEC{$dec}->decode($DEC{$dec}->encode($message, Encode::FB_DEFAULT), Encode::FB_DEFAULT);
my $not_equal = $s ne $message;
my $c = 1;
$s =~ s/\01+/$ctrl_res[$c++]/g;
if ($not_equal && weechat::config_string_to_boolean(weechat::config_get_plugin('encode_warn'))) {
$s .= ' '.weechat::color('chat_prefix_error').'[warning: lossy encode]';
}
Nlib::hdh(@line_data, +{
message => $s,
tags_array => (join ',', "lmaout_$dec", ($not_equal ? "lmaout_loss" : ()), @tags),
});
return weechat::WEECHAT_RC_OK
}
}
# XXX bad hack: \01* might be sprinkled from colorize_nicks, but will mess up later on color restore
$message =~ s/\020\01*([[:xdigit:]]+)\01*\020/chr hex $1/ge || return weechat::WEECHAT_RC_OK;
my @ctrl_res;
if ($message_nc =~ /\01/) {
my $id_control = quotemeta $message_nc;
$id_control =~ s/(\\\01)+/(.+?)/g;
@ctrl_res = $message_c =~ /^()$id_control()$/;
}
if (esc_only($message)) {
my $c = 1;
$message =~ s/\01+/$ctrl_res[$c++]/g;
Nlib::hdh(@line_data, +{ message => $message });
$ESC_MSG{$lp} = undef;
return weechat::WEECHAT_RC_OK
}
$BYTE_MSGS{$lp} = $message;
$MSG_COLOR{$lp} = join "\0", @ctrl_res;
$MSG_TIME{$lp} = 0+Nlib::hdh(@line_data, 'date');
$MSG_BUF{$lp} = Nlib::hdh(@line_data, 'buffer', 'name');
my ($nick_tag) = grep s/^nick_//, @tags;
$MSG_NICK{$lp} = $nick_tag//'';
$MSG_ENC{$lp} = 'x';
$MSG_FLT{$lp} = \undef;
apply_recode($lp);
if (defined $GC_LIMIT && ++$GC_COUNT > $GC_LIMIT) {
gc_lines('int');
$GC_COUNT = 0;
}
weechat::WEECHAT_RC_OK
}
## hook_encode_commands -- hook irc commands needed to add encode prefix
## - tag name
sub hook_encode_commands {
if ($_[0] eq 'privmsg') {
(weechat::hook_command_run('/me', 'auto_encode_cmd', 'me'),
weechat::hook_command_run('/msg', 'auto_encode_cmd', 'msg'),
weechat::hook_command_run('/query', 'auto_encode_cmd', 'query'),
)
}
elsif ($_[0] eq 'notice') {
(weechat::hook_command_run('/notice', 'auto_encode_cmd', 'query'),
weechat::hook_command_run('/wallchops', 'auto_encode_cmd', 'wallchops'),
)
}
elsif ($_[0] eq 'topic') {
(weechat::hook_command_run('/topic', 'auto_encode_cmd', 'topic'),
)
}
elsif ($_[0] eq 'part') {
(weechat::hook_command_run('/part', 'auto_encode_cmd', 'wallchops'),
weechat::hook_command_run('/cycle', 'auto_encode_cmd', 'wallchops'),
)
}
else {
()
}
}
# /lma set <time> <network.buffer|*> <nick|*|+> <encodings...>
# 0 network.* iso-user iso
# 0 network.#channel * utf8 --> iso-user always gets iso
# 0 network.* iso-user iso
# 0 network.#channel + utf8 --> iso-user gets utf8 in #channel
## load_config -- unconditionally try to pipe config file to /lma set
sub load_config {
weechat::mkdir_home('', 0755);
if (-e $CFG_FILE_NAME) {
open my $lfh, '<:utf8', $CFG_FILE_NAME || die $!;
while (<$lfh>) {
chomp;
lma_set('conf', $., split ' ', $_)
}
}
weechat::WEECHAT_RC_OK
}
## save_config -- unconditionally try to save config
sub save_config {
weechat::mkdir_home('', 0755);
open my $sfh, '>:utf8', $CFG_FILE_NAME || die $!;
local $, = ' '; local $\ = "\n";
for (@CFG_TABLE) {
print $sfh @{$_}{qw(time buf nick)}, @{$_->{charsets}};
}
for (@ENCODE_TABLE) {
print $sfh '-out', $_->{buf}, @{$_->{pat}}, $_->{charset};
}
weechat::WEECHAT_RC_OK
}
## display_time -- pretty print time
## $now - current time
## $time - time to display
## returns s/m/h or HH:MM or mm/dd
sub display_time {
my ($now, $time) = @_;
my $d = $now - $time;
if ($d < 0) {
"+"
}
elsif ($d < 60) {
"-${d}s"
}
elsif ($d < 60 * 60) {
"-@{[int($d/60)]}m"
}
elsif ($d < 60 * 60 * 24) {
my @lt = localtime $time;
sprintf '%02d:%02d', $lt[2], $lt[1]
}
elsif ($d < 31_556_926) {
my @lt = localtime $time;
sprintf '%3s%2d', ucfirst $mon[$lt[4]], $lt[3]
}
else {
'-'
}
}
## lma_list -- list decode configuration
## () - forwarded command handler
sub lma_list {
if (@_ > 2) {
weechat::print('', Nlib::fu8on(weechat::prefix('error'))."Error: unknown option for \"@{[CMD_NAME]} list\" command: $_[2]");
return weechat::WEECHAT_RC_OK
}
my %lengths;
my $now = time;
my %header = ( time => (sprintf '%*s', (length $now), 'ts'), buf => 'buf', nick => 'nick', charsets => ['charsets']);
for my $ent (\%header, @CFG_TABLE) {
for (qw(time buf nick)) {
my $len = length $ent->{$_};
$lengths{$_} = $len unless ($lengths{$_}//0) >= $len
}
my $cs_len = length join ' ', @{$ent->{charsets}};
$lengths{charsets} = $cs_len unless ($lengths{charsets}//0) >= $cs_len;
}
my $hdr = sprintf '%*s(%*s)%*s %*s %*s', -$lengths{time}, $header{time}, 5, 'when', (map { -$lengths{$_}, $header{$_} } qw(buf nick)), -$lengths{charsets}, @{$header{charsets}};
weechat::print('', $hdr);
weechat::print('', '-'x(length $hdr));
for my $ent (@CFG_TABLE) {
weechat::print('', sprintf '%*s %*s %*s %*s'.(' %s'x@{$ent->{charsets}}),
$lengths{time}, $ent->{time}, 5, display_time($now, $ent->{time}), (map { -$lengths{$_}, $ent->{$_} } qw(buf nick)), @{$ent->{charsets}});
}
if (@CFG_TABLE && @ENCODE_TABLE) {
weechat::print('', '-'x(length $hdr));
}
return weechat::WEECHAT_RC_OK unless @ENCODE_TABLE;
my %enc_lengths;
my %enc_header = (buf => 'buf', pat => ['pattern'], charset => 'charset');
for my $ent (\%enc_header, @ENCODE_TABLE) {
for (qw(buf charset)) {
my $len = length $ent->{$_};
$enc_lengths{$_} = $len unless ($enc_lengths{$_}//0) >= $len
}
my $pat_len = length join ' ', @{$ent->{pat}};
$enc_lengths{pat} = $pat_len unless ($enc_lengths{pat}//0) >= $pat_len;
}
my $enc_hdr = sprintf '%*s %*s %*s %*s', -$lengths{time}-6, 'output encodings', -$enc_lengths{buf}, $enc_header{buf}, -$enc_lengths{pat}, @{$enc_header{pat}}, -$enc_lengths{charset}, $enc_header{charset};
weechat::print('', $enc_hdr);
weechat::print('', '-'x(length $enc_hdr));
for my $ent (@ENCODE_TABLE) {
weechat::print('', sprintf '%*s %*s %*s %*s',
-$lengths{time}-6, '', -$enc_lengths{buf}, $ent->{buf}, -$enc_lengths{pat}, (join ' ', @{$ent->{pat}}), -$enc_lengths{charset}, $ent->{charset});
}
weechat::WEECHAT_RC_OK
}
## rel_time -- replace m/h/d postfix to negative time with no. of seconds
## $now - current time
## $_[1..$#_] - string to modify
sub rel_time {
my %td = (m => 60, h => 60 * 60, d => 60 * 60 * 24);
my $now = shift;
for (@_) {
my ($from, $to) = split /(?<!^)-/, $_, 2;
if (defined $to) {
rel_time($now, $from, $to);
$_ = "$from-$to";
}
else {
my @lt = localtime $now;
s{^([><])?(?:($mon_re)([0-2][1-9]|3[01]))?(?:(?:([01]?[0-9]|2[0-3]):([0-5][0-9]))(?::([0-5][0-9]))?)?$}{
my ($pfx, $mon, $day, $hr, $min, $sec) = ($1//'', $mon{lc $2}//$lt[4], $3//$lt[3], $4//$lt[2], $5//$lt[1], $6//$lt[0]);
my $yr = $lt[5];
my $day_back;
if ($mon > $lt[4] || ($mon == $lt[4] && $day > $lt[3])) { --$yr }
elsif ($mon == $lt[4] && $day == $lt[3] &&
($hr > $lt[2] ||
($hr == $lt[2] && ($min > $lt[1] ||
($min == $lt[1] && $sec > $lt[0]))))) { $day_back = 1 }
my $lt = timelocal($sec, $min, $hr, $day, $mon, $yr);
$lt -= 60 * 60 * 24 if $day_back;
$pfx.$lt
}ie ||
s/(?<!-)-(\d+)([mhd])/-($1 * $td{$2})/eg;
}
}
}
## lma_print_fmt -- print line of rule table with display_time and message prefix
## $now - time to use as base for calculations
## $msg - message prefix to show
## $time - timestamp of rule
## @rest - other string fields to print
sub lma_print_fmt {
my ($now, $msg, $time, @rest) = @_;
weechat::print('', join ' ', "$msg:", $time, "(@{[display_time($now, $time)]})", @rest)
}
## update_table2 -- create rule table from settings table
sub update_table2 {
@CFG_TABLE_2 =
sort {
$b->{prio} <=> $a->{prio} ||
($a->{prio} && $b->prio ? (
$b->{buf_len} <=> $a->{buf_len} ||
$a->{_}{buf} cmp $b->{_}{buf} ||
$b->{nick_len} <=> $a->{nick_len} ||
$a->{_}{nick} cmp $b->{_}{nick}) : (
$b->{nick_len} <=> $a->{nick_len} ||
$a->{_}{nick} cmp $b->{_}{nick} ||
$b->{buf_len} <=> $a->{buf_len} ||
$a->{_}{buf} cmp $b->{_}{buf})) ||
$b->{_}{time} <=> $a->{_}{time}
}
map {
+{
buf_len => (2 * (length $_->{buf}) - 3 * ($_->{buf} =~ y/*//)),
nick_len => (2 * (length $_->{nick}) - 3 * ($_->{nick} =~ y/*//) - ($_->{nick} =~ /\+$/)),
prio => !!($_->{nick} =~ /\+$/),
buf_re => do {
my $buf = $_->{buf};
wildcard_to_re($buf);
qr/^$buf$/i },
nick_re => do {
my $nick = $_->{nick};
$nick =~ s/\+$//;
$nick = '*' unless length $nick;
wildcard_to_re($nick);
qr/^$nick$/i },
'_' => $_,
}
} @CFG_TABLE
}
## lma_del_out -- delete output encoding
## $_[0] - 'int' if internal
## $buf - target specification
## $glob - set if last param starts with -g XXX
## @pat - match pattern
sub lma_del_out {
my (undef, $buf, $glob, @pat) = @_;
my $internal = $_[0] eq 'int';
if ($glob) {
wildcard_to_re($buf, @pat);
}
else {
$_ = quotemeta for $buf, @pat;
}
my $pat_re = join '\s+', @pat;
my $num_cfg = @ENCODE_TABLE;
@ENCODE_TABLE = sort { $a->{buf} cmp $b->{buf} || (join ' ', @{$a->{pat}}) cmp (join ' ', @{$b->{pat}}) }
grep {
!(
$_->{buf} =~ /^\s*$buf\s*$/i && (join ' ', @{$_->{pat}}) =~ /^\s*$pat_re\s*$/i)
}
grep { length $_->{charset} }
@ENCODE_TABLE;
update_enc_table();
weechat::print('', "Removed @{[$num_cfg-@ENCODE_TABLE]} entries from @{[SCRIPT_NAME]} list") unless $internal;
weechat::WEECHAT_RC_OK
}
sub update_enc_table {
@ENCODE_TABLE2 =
sort {
$b->{buf_len} <=> $a->{buf_len} ||
$a->{_}{buf} cmp $b->{_}{buf} ||
$b->{pat_len} <=> $a->{pat_len} ||
$a->{pat} cmp $b->{pat}
}
map {
my $pat = join ' ', @{$_->{pat}};
+{
pat => $pat,
buf_len => (2 * (length $_->{buf}) - 3 * ($_->{buf} =~ y/*//)),
pat_len => (2 * (length $pat) - 3 * ($pat =~ y/*//)),
buf_re => do {
my $buf = $_->{buf};
wildcard_to_re($buf);
qr/^$buf$/i },
pat_re => do {
my @pat = @{$_->{pat}};
wildcard_to_re(@pat);
for (@pat) {
s/\.\*/\\S*/g unless $_ eq '.*'; # wasn't *
s/(?:\\S\*){2,}/.*/g; # reverse with **
}
my $pat_re = join '\s+', @pat;
qr/^$pat_re$/i },
'_' => $_,
}
} @ENCODE_TABLE
}
## lma_set_out -- modify output encoding
## $_[0] - 'conf' if called from config
## $cs - charset
## $buf - target specification
## @pat - match pattern
sub lma_set_out {
my (undef, $cs, $buf, @pat) = @_;
my $conf = ($_[0]//'') eq 'conf';
$cs//='';
lma_del('int', undef, '-out', $buf, @pat);
push @ENCODE_TABLE, +{ buf => $buf, pat => \@pat, charset => $cs };
my $msg = length $cs ? 'added' : 'removed';
weechat::print('', join ' ', "$msg:", '-out', $buf, @pat, $cs) unless $conf;
@ENCODE_TABLE = sort { $a->{buf} cmp $b->{buf} || (join ' ', @{$a->{pat}}) cmp (join ' ', @{$b->{pat}}) }
grep { length $_->{charset} }
@ENCODE_TABLE;
update_enc_table();
weechat::WEECHAT_RC_OK
}
## lma_set -- add or modify a rule entry
## () - forwarded command handler
## $_[0] - 'conf' if called from config load
## $time - timestamp
## $buf - buffer
## $nick - nick
## @charsets - list of charsets to try in order
sub lma_set {
my (undef, undef, $time, $buf, $nick, @charsets) = @_;
my $conf = ($_[0]//'') eq 'conf';
my $conf_err = ($conf ? ", @{[SCRIPT_NAME]}.conf line $_[1]" : '');
unless (defined $nick) {
weechat::print('', Nlib::fu8on(weechat::prefix('error'))."@{[SCRIPT_NAME]}: too few arguments for \"@{[CMD_NAME]} set\" command$conf_err");
return weechat::WEECHAT_RC_OK
}
my $now = time;
rel_time($now, $time);
my ($out_p, $recode_p);
if (lc $time eq '-out') {
$out_p = 1;
}
elsif ($time !~ /^-?\d+$/) {
weechat::print('', Nlib::fu8on(weechat::prefix('error'))."Error: incorrect number: $time in \"@{[CMD_NAME]} set\" command$conf_err");
return weechat::WEECHAT_RC_OK
}
else {
$recode_p = 1;
$time += $now unless $time > 0;
}
my @out_pattern;
if ($out_p) {
my $cs = pop @charsets;