pmb.parse.version: Replace token_value() with Token enum (MR 2518)

Hopefully makes for easier to read code, and potentially also faster
once we have mypyc and the enum can get compiled into plain integers
instead of the strings we previously were working with.
This commit is contained in:
Newbyte 2025-01-01 12:44:23 +01:00 committed by Oliver Smith
parent 510469b304
commit 415893fb74
No known key found for this signature in database
GPG key ID: 5AE7F5513E0885CB
2 changed files with 52 additions and 61 deletions

View file

@ -1,6 +1,6 @@
# Copyright 2024 Stefan Hansson # Copyright 2024 Stefan Hansson
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
from .version import check_string, compare, get_token, parse_suffix, validate from .version import check_string, compare, get_token, parse_suffix, validate, Token
def test_check_string() -> None: def test_check_string() -> None:
@ -19,8 +19,8 @@ def test_compare() -> None:
def test_get_token() -> None: def test_get_token() -> None:
next, value, rest = get_token("letter", "2024.01_rc4") next, value, rest = get_token(Token.LETTER, "2024.01_rc4")
assert next == "digit" assert next == Token.DIGIT
assert value == 50 assert value == 50
assert rest == "024.01_rc4" assert rest == "024.01_rc4"

View file

@ -1,6 +1,7 @@
# Copyright 2023 Oliver Smith # Copyright 2023 Oliver Smith
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
import collections import collections
from enum import IntEnum
""" """
In order to stay as compatible to Alpine's apk as possible, this code In order to stay as compatible to Alpine's apk as possible, this code
@ -10,32 +11,22 @@ https://gitlab.alpinelinux.org/alpine/apk-tools/-/blob/5d796b567819ce91740fcdea7
""" """
def token_value(string: str) -> int: class Token(IntEnum):
""" """
Return the associated value for a given token string (we parse
through the version string one token at a time).
:param string: a token string
:returns: integer associated to the token (so we can compare them in
functions further below, a digit (1) looses against a
letter (2), because "letter" has a higher value).
C equivalent: enum PARTS C equivalent: enum PARTS
""" """
order = {
"invalid": -1, INVALID = -1
"digit_or_zero": 0, DIGIT_OR_ZERO = 0
"digit": 1, DIGIT = 1
"letter": 2, LETTER = 2
"suffix": 3, SUFFIX = 3
"suffix_no": 4, SUFFIX_NO = 4
"revision_no": 5, REVISION_NO = 5
"end": 6, END = 6
}
return order[string]
def next_token(previous: str, rest: str) -> tuple[str, str]: def next_token(previous: Token, rest: str) -> tuple[Token, str]:
""" """
Parse the next token in the rest of the version string, we're Parse the next token in the rest of the version string, we're
currently looking at. currently looking at.
@ -52,41 +43,41 @@ def next_token(previous: str, rest: str) -> tuple[str, str]:
C equivalent: next_token() C equivalent: next_token()
""" """
next = "invalid" next = Token.INVALID
char = rest[:1] char = rest[:1]
# Tokes, which do not change rest # Tokes, which do not change rest
if not len(rest): if not len(rest):
next = "end" next = Token.END
elif previous in ["digit", "digit_or_zero"] and char.islower(): elif previous in [Token.DIGIT, Token.DIGIT_OR_ZERO] and char.islower():
next = "letter" next = Token.LETTER
elif previous == "letter" and char.isdigit(): elif previous == Token.LETTER and char.isdigit():
next = "digit" next = Token.DIGIT
elif previous == "suffix" and char.isdigit(): elif previous == Token.SUFFIX and char.isdigit():
next = "suffix_no" next = Token.SUFFIX_NO
# Tokens, which remove the first character of rest # Tokens, which remove the first character of rest
else: else:
if char == ".": if char == ".":
next = "digit_or_zero" next = Token.DIGIT_OR_ZERO
elif char == "_": elif char == "_":
next = "suffix" next = Token.SUFFIX
elif rest.startswith("-r"): elif rest.startswith("-r"):
next = "revision_no" next = Token.REVISION_NO
rest = rest[1:] rest = rest[1:]
elif char == "-": elif char == "-":
next = "invalid" next = Token.INVALID
rest = rest[1:] rest = rest[1:]
# Validate current token # Validate current token
# Check if the transition from previous to current is valid # Check if the transition from previous to current is valid
if token_value(next) < token_value(previous): if next < previous:
if not ( if not (
(next == "digit_or_zero" and previous == "digit") (next == Token.DIGIT_OR_ZERO and previous == Token.DIGIT)
or (next == "suffix" and previous == "suffix_no") or (next == Token.SUFFIX and previous == Token.SUFFIX_NO)
or (next == "digit" and previous == "letter") or (next == Token.DIGIT and previous == Token.LETTER)
): ):
next = "invalid" next = Token.INVALID
return (next, rest) return (next, rest)
@ -129,7 +120,7 @@ def parse_suffix(rest: str) -> tuple[str, int, bool]:
return (rest, 0, True) return (rest, 0, True)
def get_token(previous: str, rest: str) -> tuple[str, int, str]: def get_token(previous: Token, rest: str) -> tuple[Token, int, str]:
""" """
This function does three things: This function does three things:
* get the next token * get the next token
@ -146,22 +137,22 @@ def get_token(previous: str, rest: str) -> tuple[str, int, str]:
""" """
# Set defaults # Set defaults
value = 0 value = 0
next = "invalid" next = Token.INVALID
invalid_suffix = False invalid_suffix = False
# Bail out if at the end # Bail out if at the end
if not len(rest): if not len(rest):
return ("end", 0, rest) return (Token.END, 0, rest)
# Cut off leading zero digits # Cut off leading zero digits
if previous == "digit_or_zero" and rest.startswith("0"): if previous == Token.DIGIT_OR_ZERO and rest.startswith("0"):
while rest.startswith("0"): while rest.startswith("0"):
rest = rest[1:] rest = rest[1:]
value -= 1 value -= 1
next = "digit" next = Token.DIGIT
# Add up numeric values # Add up numeric values
elif previous in ["digit_or_zero", "digit", "suffix_no", "revision_no"]: elif previous in [Token.DIGIT_OR_ZERO, Token.DIGIT, Token.SUFFIX_NO, Token.REVISION_NO]:
for i in range(len(rest)): for i in range(len(rest)):
while len(rest) and rest[0].isdigit(): while len(rest) and rest[0].isdigit():
value *= 10 value *= 10
@ -169,10 +160,10 @@ def get_token(previous: str, rest: str) -> tuple[str, int, str]:
rest = rest[1:] rest = rest[1:]
# Append chars or parse suffix # Append chars or parse suffix
elif previous == "letter": elif previous == Token.LETTER:
value = ord(rest[0]) value = ord(rest[0])
rest = rest[1:] rest = rest[1:]
elif previous == "suffix": elif previous == Token.SUFFIX:
(rest, value, invalid_suffix) = parse_suffix(rest) (rest, value, invalid_suffix) = parse_suffix(rest)
# Invalid previous token # Invalid previous token
@ -181,8 +172,8 @@ def get_token(previous: str, rest: str) -> tuple[str, int, str]:
# Get the next token (for non-leading zeros) # Get the next token (for non-leading zeros)
if not len(rest): if not len(rest):
next = "end" next = Token.END
elif next == "invalid" and not invalid_suffix: elif next == Token.INVALID and not invalid_suffix:
(next, rest) = next_token(previous, rest) (next, rest) = next_token(previous, rest)
return (next, value, rest) return (next, value, rest)
@ -197,11 +188,11 @@ def validate(version: str) -> bool:
C equivalent: apk_version_validate() C equivalent: apk_version_validate()
""" """
current = "digit" current = Token.DIGIT
rest = version rest = version
while current != "end": while current != Token.END:
(current, value, rest) = get_token(current, rest) (current, value, rest) = get_token(current, rest)
if current == "invalid": if current == Token.INVALID:
return False return False
return True return True
@ -225,8 +216,8 @@ def compare(a_version: str, b_version: str, fuzzy: bool = False) -> int:
""" """
# Defaults # Defaults
a_token = "digit" a_token = Token.DIGIT
b_token = "digit" b_token = Token.DIGIT
a_value = 0 a_value = 0
b_value = 0 b_value = 0
a_rest = a_version a_rest = a_version
@ -234,7 +225,7 @@ def compare(a_version: str, b_version: str, fuzzy: bool = False) -> int:
# Parse A and B one token at a time, until one string ends, or the # Parse A and B one token at a time, until one string ends, or the
# current token has a different type/value # current token has a different type/value
while a_token == b_token and a_token not in ["end", "invalid"] and a_value == b_value: while a_token == b_token and a_token not in [Token.END, Token.INVALID] and a_value == b_value:
(a_token, a_value, a_rest) = get_token(a_token, a_rest) (a_token, a_value, a_rest) = get_token(a_token, a_rest)
(b_token, b_value, b_rest) = get_token(b_token, b_rest) (b_token, b_value, b_rest) = get_token(b_token, b_rest)
@ -252,19 +243,19 @@ def compare(a_version: str, b_version: str, fuzzy: bool = False) -> int:
# Leading version components and their values are equal, now the # Leading version components and their values are equal, now the
# non-terminating version is greater unless it's a suffix # non-terminating version is greater unless it's a suffix
# indicating pre-release # indicating pre-release
if a_token == "suffix": if a_token == Token.SUFFIX:
(a_token, a_value, a_rest) = get_token(a_token, a_rest) (a_token, a_value, a_rest) = get_token(a_token, a_rest)
if a_value < 0: if a_value < 0:
return -1 return -1
if b_token == "suffix": if b_token == Token.SUFFIX:
(b_token, b_value, b_rest) = get_token(b_token, b_rest) (b_token, b_value, b_rest) = get_token(b_token, b_rest)
if b_value < 0: if b_value < 0:
return 1 return 1
# Compare the token value (e.g. digit < letter) # Compare the token value (e.g. digit < letter)
if token_value(a_token) > token_value(b_token): if a_token > b_token:
return -1 return -1
if token_value(a_token) < token_value(b_token): if a_token < b_token:
return 1 return 1
# The tokens are not the same, but previous checks revealed that it # The tokens are not the same, but previous checks revealed that it