Skip to content

Commit 5990960

Browse files
authored
Drop support of Python older then 3.8 (#11)
1 parent cde8733 commit 5990960

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+182
-408
lines changed

benchmarks/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import

benchmarks/bench.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
31
"""
42
Pymorphy2 benchmark utility.
53
@@ -15,8 +13,9 @@
1513
1614
"""
1715
import logging
18-
import sys
1916
import os
17+
import sys
18+
2019
from docopt import docopt
2120

2221
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

benchmarks/shrink-unigrams.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
"cell_type": "code",
1919
"collapsed": false,
2020
"input": [
21-
"from __future__ import unicode_literals\n",
2221
"import random\n",
2322
"import math\n",
2423
"random.seed(0)\n",

benchmarks/speed.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import, unicode_literals, division
3-
import logging
41
import codecs
5-
import os
6-
import functools
72
import datetime
3+
import functools
4+
import logging
5+
import os
86

9-
from pymorphy2 import MorphAnalyzer
107
from benchmarks import utils
8+
from pymorphy2 import MorphAnalyzer
119

1210
logger = logging.getLogger('pymorphy2.bench')
1311

benchmarks/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import, unicode_literals, division
1+
import gc
32
import time
43
import timeit
5-
import gc
4+
65

76
def measure(func, inner_iterations=1, repeats=5):
87
"""

docs/conf.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
#
31
# pymorphy2 documentation build configuration file, created by
42
# sphinx-quickstart on Sun Jul 29 04:34:30 2012.
53
#
@@ -10,9 +8,9 @@
108
#
119
# All configuration values have a default; values that are commented out
1210
# serve to show the default.
13-
from __future__ import unicode_literals
1411

15-
import sys, os
12+
import os
13+
import sys
1614

1715
# If extensions (or modules to document with autodoc) are in another directory,
1816
# add these directories to sys.path here. If the directory is relative to the

pymorphy2/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
1-
# -*- coding: utf-8 -*-
2-
from .version import __version__
31
from .analyzer import MorphAnalyzer
2+
from .version import __version__

pymorphy2/analyzer.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import print_function, unicode_literals, division
3-
import os
4-
import heapq
51
import collections
2+
import heapq
63
import logging
7-
import threading
84
import operator
5+
import os
6+
import threading
97
import warnings
108

9+
import pymorphy2.lang
1110
from pymorphy2 import opencorpora_dict
1211
from pymorphy2.dawg import ConditionalProbDistDAWG
13-
import pymorphy2.lang
1412

1513
logger = logging.getLogger(__name__)
1614

@@ -66,7 +64,7 @@ def normalized(self):
6664
# return self._dict.build_paradigm_info(self.para_id)
6765

6866

69-
class ProbabilityEstimator(object):
67+
class ProbabilityEstimator:
7068
def __init__(self, dict_path):
7169
cpd_path = os.path.join(dict_path, 'p_t_given_w.intdawg')
7270
self.p_t_given_w = ConditionalProbDistDAWG().load(cpd_path)
@@ -140,14 +138,12 @@ def lang_dict_path(lang):
140138
return lang_paths[lang]
141139

142140
raise ValueError(
143-
"Can't find a dictionary for language %r. Installed languages: %r. "
144-
"Try installing pymorphy2-dicts-%s package." % (
145-
lang, list(lang_paths.keys()), lang
146-
)
141+
f"Can't find a dictionary for language {lang!r}. Installed languages: {list(lang_paths.keys())!r}. "
142+
f"Try installing pymorphy2-dicts-{lang} package."
147143
)
148144

149145

150-
class MorphAnalyzer(object):
146+
class MorphAnalyzer:
151147
"""
152148
Morphological analyzer for Russian language.
153149
@@ -293,8 +289,8 @@ def choose_language(cls, dictionary, lang):
293289
if dictionary.lang != lang:
294290
# allow incorrect 'lang' values, but show a warning
295291
warnings.warn(
296-
"Dictionary language (%r) doesn't match "
297-
"analyzer language (%r)." % (dictionary.lang, lang)
292+
f"Dictionary language ({dictionary.lang!r}) doesn't match "
293+
f"analyzer language ({lang!r})."
298294
)
299295

300296
return lang

pymorphy2/cache.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

pymorphy2/cli.py

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
3-
from __future__ import unicode_literals, print_function, division
4-
5-
import sys
61
import logging
7-
import time
8-
import codecs
92
import operator
3+
import sys
4+
import time
5+
from functools import lru_cache
106

117
import pymorphy2
12-
from pymorphy2.cache import lru_cache, memoized_with_single_argument
13-
from pymorphy2.utils import get_mem_usage
148
from pymorphy2.tokenizers import simple_word_tokenize
15-
16-
PY2 = sys.version_info[0] == 2
9+
from pymorphy2.utils import get_mem_usage
1710

1811
# Hacks are here to make docstring compatible with both
1912
# docopt and sphinx.ext.autodoc.
@@ -80,10 +73,7 @@ def main(argv=None):
8073
else:
8174
score, lemmatize, tag = True, True, True
8275

83-
if PY2:
84-
out_file = codecs.getwriter('utf8')(sys.stdout)
85-
else:
86-
out_file = sys.stdout
76+
out_file = sys.stdout
8777

8878
return parse(
8979
morph=morph,
@@ -112,14 +102,9 @@ def main(argv=None):
112102
def _open_for_read(fn):
113103
""" Open a file for reading """
114104
if fn in ['-', '', None]:
115-
if PY2:
116-
return codecs.getreader('utf8')(sys.stdin)
117-
else:
118-
return sys.stdin
119-
if PY2:
120-
return codecs.open(fn, 'rt', encoding='utf8')
121-
else:
122-
return open(fn, 'rt', encoding='utf8')
105+
return sys.stdin
106+
107+
return open(fn, 'rt', encoding='utf8')
123108

124109

125110
# ============================ Commands ===========================
@@ -178,7 +163,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
178163

179164
_parse = parser.parse
180165
if cache_size == 'unlim':
181-
_parse = memoized_with_single_argument({})(_parse)
166+
_parse = lru_cache(None)(_parse)
182167
else:
183168
cache_size = int(cache_size)
184169
if cache_size:
@@ -189,7 +174,7 @@ def parse(morph, in_file, out_file, tokenize, score, normal_form, tag,
189174
_write(_parse(token))
190175

191176

192-
class _TokenParserFormatter(object):
177+
class _TokenParserFormatter:
193178
"""
194179
This class defines its `parse` method based on arguments passed.
195180
Some ugly code is to make all ifs work only once, not for each token.
@@ -218,14 +203,14 @@ def __init__(self, morph, score, normal_form, tag, newlines, thresh):
218203
if score:
219204
def _parse_token(tok):
220205
seq = [
221-
"%s:%0.3f=%s" % (p.normal_form, p.score, p.tag)
206+
f"{p.normal_form}:{p.score:0.3f}={p.tag}"
222207
for p in morph_parse(tok) if p.score >= thresh
223208
]
224209
return tpl % (tok, join(seq))
225210
else:
226211
def _parse_token(tok):
227212
seq = [
228-
"%s:%s" % (p.normal_form, p.tag)
213+
f"{p.normal_form}:{p.tag}"
229214
for p in morph_parse(tok) if p.score >= thresh
230215
]
231216
return tpl % (tok, join(seq))
@@ -241,7 +226,7 @@ def _parse_token(tok):
241226
key=val, reverse=True
242227
)
243228
if score:
244-
seq = ["%s:%0.3f" % (lemma, w) for (lemma, w) in items]
229+
seq = [f"{lemma}:{w:0.3f}" for (lemma, w) in items]
245230
else:
246231
seq = [lemma for (lemma, w) in items]
247232

@@ -250,7 +235,7 @@ def _parse_token(tok):
250235
if score:
251236
def _parse_token(tok):
252237
seq = [
253-
"%0.3f=%s" % (p.score, p.tag)
238+
f"{p.score:0.3f}={p.tag}"
254239
for p in morph_parse(tok) if p.score >= thresh
255240
]
256241
return tpl % (tok, join(seq))

pymorphy2/dawg.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import, division
3-
41
try:
52
from dawg import DAWG, RecordDAWG, IntCompletionDAWG
63
EXTENSION_AVAILABLE = True
@@ -25,14 +22,14 @@ class WordsDawg(RecordDAWG):
2522
# We are storing 2 unsigned short ints as values:
2623
# the paradigm ID and the form index (inside paradigm).
2724
# Byte order is big-endian (this makes word forms properly sorted).
28-
DATA_FORMAT = str(">HH")
25+
DATA_FORMAT = ">HH"
2926

3027
def __init__(self, data=None):
3128
if data is None:
32-
super(WordsDawg, self).__init__(self.DATA_FORMAT)
29+
super().__init__(self.DATA_FORMAT)
3330
else:
3431
assert_can_create()
35-
super(WordsDawg, self).__init__(self.DATA_FORMAT, data)
32+
super().__init__(self.DATA_FORMAT, data)
3633

3734

3835
class PredictionSuffixesDAWG(WordsDawg):
@@ -43,7 +40,7 @@ class PredictionSuffixesDAWG(WordsDawg):
4340
# We are storing 3 unsigned short ints as values:
4441
# count, the paradigm ID and the form index (inside paradigm).
4542
# Byte order is big-endian (this makes word forms properly sorted).
46-
DATA_FORMAT = str(">HHH")
43+
DATA_FORMAT = ">HHH"
4744

4845

4946
class ConditionalProbDistDAWG(IntCompletionDAWG):
@@ -52,17 +49,17 @@ class ConditionalProbDistDAWG(IntCompletionDAWG):
5249

5350
def __init__(self, data=None):
5451
if data is None:
55-
super(ConditionalProbDistDAWG, self).__init__()
52+
super().__init__()
5653
else:
5754
assert_can_create()
5855
dawg_data = (
59-
("%s:%s" % (word, tag), int(prob*self.MULTIPLIER))
56+
(f"{word}:{tag}", int(prob * self.MULTIPLIER))
6057
for (word, tag), prob in data
6158
)
62-
super(ConditionalProbDistDAWG, self).__init__(dawg_data)
59+
super().__init__(dawg_data)
6360

6461
def prob(self, word, tag):
65-
dawg_key = "%s:%s" % (word, tag)
62+
dawg_key = f"{word}:{tag}"
6663
return self.get(dawg_key, 0) / self.MULTIPLIER
6764

6865

@@ -71,7 +68,7 @@ def is_prefixed(self, word):
7168
return bool(self.prefixes(word))
7269

7370

74-
class PythonPrefixMatcher(object):
71+
class PythonPrefixMatcher:
7572
def __init__(self, prefixes):
7673
self._prefixes = tuple(prefixes)
7774

pymorphy2/lang/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
31
from . import ru, uk

pymorphy2/lang/ru/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
31
from .config import *

pymorphy2/lang/ru/config.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
# -*- coding: utf-8 -*-
21
"""
32
Constants and configuration for Russian language.
43
"""
5-
from __future__ import absolute_import, unicode_literals
64
from pymorphy2 import units
75

86
# paradigm prefixes used for dictionary compilation

pymorphy2/lang/uk/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import
31
from .config import *

pymorphy2/lang/uk/_prefixes.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import absolute_import, unicode_literals
3-
41
# Prefixes which don't change the word parse.
52
# The list is from
63
# https://github.com/languagetool-org/languagetool/blob/master/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/dash_prefixes.txt

pymorphy2/lang/uk/config.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
# -*- coding: utf-8 -*-
21
"""
32
Constants and configuration for Ukrainian language.
43
"""
5-
from __future__ import absolute_import, unicode_literals
64
from pymorphy2 import units
75
from ._prefixes import KNOWN_PREFIXES
86

0 commit comments

Comments
 (0)