From b832b5da48118bc0734a538aff56e85b26b12955 Mon Sep 17 00:00:00 2001 From: wschuell Date: Wed, 27 Nov 2024 04:04:34 +0100 Subject: [PATCH 1/8] maybe_string dealing with non-unicode strings --- pygit2/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pygit2/utils.py b/pygit2/utils.py index 0660cc42..bccfe35b 100644 --- a/pygit2/utils.py +++ b/pygit2/utils.py @@ -34,8 +34,10 @@ def maybe_string(ptr): if not ptr: return None - return ffi.string(ptr).decode('utf8') - + try: + return ffi.string(ptr).decode('utf8') + except UnicodeDecodeError: + return ffi.string(ptr) def to_bytes(s, encoding='utf-8', errors='strict'): if s == ffi.NULL or s is None: From af8e73d76e4225bef2812b4544b0b2f893482745 Mon Sep 17 00:00:00 2001 From: William Schueller Date: Wed, 27 Nov 2024 12:29:23 +0100 Subject: [PATCH 2/8] Unit test for non unicode branch name --- pygit2/utils.py | 30 +++++++++++------------ test/test_nonunicode.py | 53 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 16 deletions(-) create mode 100644 test/test_nonunicode.py diff --git a/pygit2/utils.py b/pygit2/utils.py index bccfe35b..b0732e7a 100644 --- a/pygit2/utils.py +++ b/pygit2/utils.py @@ -34,16 +34,14 @@ def maybe_string(ptr): if not ptr: return None - try: - return ffi.string(ptr).decode('utf8') - except UnicodeDecodeError: - return ffi.string(ptr) + return ffi.string(ptr).decode("utf8", errors="surrogateescape") + -def to_bytes(s, encoding='utf-8', errors='strict'): +def to_bytes(s, encoding="utf-8", errors="strict"): if s == ffi.NULL or s is None: return ffi.NULL - if hasattr(s, '__fspath__'): + if hasattr(s, "__fspath__"): s = os.fspath(s) if isinstance(s, bytes): @@ -53,7 +51,7 @@ def to_bytes(s, encoding='utf-8', errors='strict'): def to_str(s): - if hasattr(s, '__fspath__'): + if hasattr(s, "__fspath__"): s = os.fspath(s) if type(s) is str: @@ -71,13 +69,13 @@ def ptr_to_bytes(ptr_cdata): to a byte buffer containing the address that the pointer refers to. """ - pp = ffi.new('void **', ptr_cdata) + pp = ffi.new("void **", ptr_cdata) return bytes(ffi.buffer(pp)[:]) @contextlib.contextmanager def new_git_strarray(): - strarray = ffi.new('git_strarray *') + strarray = ffi.new("git_strarray *") yield strarray C.git_strarray_dispose(strarray) @@ -90,7 +88,7 @@ def strarray_to_strings(arr): calling this function. """ try: - return [ffi.string(arr.strings[i]).decode('utf-8') for i in range(arr.count)] + return [ffi.string(arr.strings[i]).decode("utf-8") for i in range(arr.count)] finally: C.git_strarray_dispose(arr) @@ -122,19 +120,19 @@ def __init__(self, l): return if not isinstance(l, (list, tuple)): - raise TypeError('Value must be a list') + raise TypeError("Value must be a list") strings = [None] * len(l) for i in range(len(l)): li = l[i] - if not isinstance(li, str) and not hasattr(li, '__fspath__'): - raise TypeError('Value must be a string or PathLike object') + if not isinstance(li, str) and not hasattr(li, "__fspath__"): + raise TypeError("Value must be a string or PathLike object") - strings[i] = ffi.new('char []', to_bytes(li)) + strings[i] = ffi.new("char []", to_bytes(li)) - self.__arr = ffi.new('char *[]', strings) + self.__arr = ffi.new("char *[]", strings) self.__strings = strings - self.__array = ffi.new('git_strarray *', [self.__arr, len(strings)]) + self.__array = ffi.new("git_strarray *", [self.__arr, len(strings)]) def __enter__(self): return self diff --git a/test/test_nonunicode.py b/test/test_nonunicode.py new file mode 100644 index 00000000..ae53f58a --- /dev/null +++ b/test/test_nonunicode.py @@ -0,0 +1,53 @@ +# Copyright 2010-2024 The pygit2 contributors +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# In addition to the permissions in the GNU General Public License, +# the authors give you unlimited permission to link the compiled +# version of this file into combinations with other programs, +# and to distribute those combinations without any restriction +# coming from the use of this file. (The General Public License +# restrictions do apply in other respects; for example, they cover +# modification of the file, and distribution when not linked into +# a combined executable.) +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to +# the Free Software Foundation, 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. + +"""Tests for reference objects.""" + +from pathlib import Path + +import pygit2 +import pytest +import subprocess +import os + +bstring_list = [b"\xc3master"] + + +@pytest.fixture(params=bstring_list) +def bstring(request): + return request.param + + +def test_nonunicode_branchname(testrepo, bstring): + cmd = b"git checkout -b " + bstring + subprocess.check_output(cmd.split(b" "), cwd=testrepo.workdir) + newrepo = pygit2.clone_repository( + testrepo.workdir, + os.path.join(os.path.dirname(testrepo.workdir), "test_nonunicode_repo"), + ) + assert bstring in [ + branch.encode("utf8", "surrogateescape") + for branch in newrepo.listall_branches() + ] From 5c11ece134e667718269b53ef7ac379db68c5773 Mon Sep 17 00:00:00 2001 From: William Schueller Date: Wed, 27 Nov 2024 15:20:01 +0100 Subject: [PATCH 3/8] test using pygit2 for branch creation instead of subprocess call, but fails because of surrogates not allowed --- pygit2/utils.py | 26 +++++++++++++------------- test/test_nonunicode.py | 3 +-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/pygit2/utils.py b/pygit2/utils.py index b0732e7a..1d5fd67b 100644 --- a/pygit2/utils.py +++ b/pygit2/utils.py @@ -34,14 +34,14 @@ def maybe_string(ptr): if not ptr: return None - return ffi.string(ptr).decode("utf8", errors="surrogateescape") + return ffi.string(ptr).decode('utf8', errors='surrogateescape') -def to_bytes(s, encoding="utf-8", errors="strict"): +def to_bytes(s, encoding='utf-8', errors='strict'): if s == ffi.NULL or s is None: return ffi.NULL - if hasattr(s, "__fspath__"): + if hasattr(s, '__fspath__'): s = os.fspath(s) if isinstance(s, bytes): @@ -51,7 +51,7 @@ def to_bytes(s, encoding="utf-8", errors="strict"): def to_str(s): - if hasattr(s, "__fspath__"): + if hasattr(s, '__fspath__'): s = os.fspath(s) if type(s) is str: @@ -69,13 +69,13 @@ def ptr_to_bytes(ptr_cdata): to a byte buffer containing the address that the pointer refers to. """ - pp = ffi.new("void **", ptr_cdata) + pp = ffi.new('void **', ptr_cdata) return bytes(ffi.buffer(pp)[:]) @contextlib.contextmanager def new_git_strarray(): - strarray = ffi.new("git_strarray *") + strarray = ffi.new('git_strarray *') yield strarray C.git_strarray_dispose(strarray) @@ -88,7 +88,7 @@ def strarray_to_strings(arr): calling this function. """ try: - return [ffi.string(arr.strings[i]).decode("utf-8") for i in range(arr.count)] + return [ffi.string(arr.strings[i]).decode('utf-8') for i in range(arr.count)] finally: C.git_strarray_dispose(arr) @@ -120,19 +120,19 @@ def __init__(self, l): return if not isinstance(l, (list, tuple)): - raise TypeError("Value must be a list") + raise TypeError('Value must be a list') strings = [None] * len(l) for i in range(len(l)): li = l[i] - if not isinstance(li, str) and not hasattr(li, "__fspath__"): - raise TypeError("Value must be a string or PathLike object") + if not isinstance(li, str) and not hasattr(li, '__fspath__'): + raise TypeError('Value must be a string or PathLike object') - strings[i] = ffi.new("char []", to_bytes(li)) + strings[i] = ffi.new('char []', to_bytes(li)) - self.__arr = ffi.new("char *[]", strings) + self.__arr = ffi.new('char *[]', strings) self.__strings = strings - self.__array = ffi.new("git_strarray *", [self.__arr, len(strings)]) + self.__array = ffi.new('git_strarray *', [self.__arr, len(strings)]) def __enter__(self): return self diff --git a/test/test_nonunicode.py b/test/test_nonunicode.py index ae53f58a..ef26d720 100644 --- a/test/test_nonunicode.py +++ b/test/test_nonunicode.py @@ -41,8 +41,7 @@ def bstring(request): def test_nonunicode_branchname(testrepo, bstring): - cmd = b"git checkout -b " + bstring - subprocess.check_output(cmd.split(b" "), cwd=testrepo.workdir) + testrepo.branches.local.create(bstring.decode("utf8", errors="surrogateescape"),commit=testrepo.head.target) newrepo = pygit2.clone_repository( testrepo.workdir, os.path.join(os.path.dirname(testrepo.workdir), "test_nonunicode_repo"), From 32568133533497b24935f17d64db0ae39715125c Mon Sep 17 00:00:00 2001 From: William Schueller Date: Wed, 27 Nov 2024 15:33:33 +0100 Subject: [PATCH 4/8] reverting to subprocess using str not bytes --- test/test_nonunicode.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_nonunicode.py b/test/test_nonunicode.py index ef26d720..18d21887 100644 --- a/test/test_nonunicode.py +++ b/test/test_nonunicode.py @@ -41,7 +41,8 @@ def bstring(request): def test_nonunicode_branchname(testrepo, bstring): - testrepo.branches.local.create(bstring.decode("utf8", errors="surrogateescape"),commit=testrepo.head.target) + cmd = b"git checkout -b " + bstring + subprocess.check_output(cmd.decode('utf8',errors='surrogateescape').split(" "), cwd=testrepo.workdir) newrepo = pygit2.clone_repository( testrepo.workdir, os.path.join(os.path.dirname(testrepo.workdir), "test_nonunicode_repo"), From e6a35c5457c80c4cbdd733ee0f8a1a7598797e09 Mon Sep 17 00:00:00 2001 From: William Schueller Date: Wed, 27 Nov 2024 22:08:07 +0100 Subject: [PATCH 5/8] update test with cloning of existing repo rather than creating new branch --- test/test_nonunicode.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/test/test_nonunicode.py b/test/test_nonunicode.py index 18d21887..5094f618 100644 --- a/test/test_nonunicode.py +++ b/test/test_nonunicode.py @@ -23,31 +23,24 @@ # the Free Software Foundation, 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301, USA. -"""Tests for reference objects.""" - -from pathlib import Path +"""Tests for non unicode byte strings""" import pygit2 -import pytest -import subprocess import os +import shutil -bstring_list = [b"\xc3master"] - - -@pytest.fixture(params=bstring_list) -def bstring(request): - return request.param +bstring = b'\xc3master' -def test_nonunicode_branchname(testrepo, bstring): - cmd = b"git checkout -b " + bstring - subprocess.check_output(cmd.decode('utf8',errors='surrogateescape').split(" "), cwd=testrepo.workdir) +def test_nonunicode_branchname(testrepo): + folderpath = 'temp_repo_nonutf' + if os.path.exists(folderpath): + shutil.rmdir(folderpath) newrepo = pygit2.clone_repository( - testrepo.workdir, - os.path.join(os.path.dirname(testrepo.workdir), "test_nonunicode_repo"), - ) + path=folderpath, + url='https://github.com/pygit2/test_branch_notutf.git' + ) assert bstring in [ - branch.encode("utf8", "surrogateescape") - for branch in newrepo.listall_branches() - ] + (ref.split('/')[-1]).encode('utf8', 'surrogateescape') + for ref in newrepo.listall_references() + ] # Remote branch among references: 'refs/remotes/origin/\udcc3master' From 44ef3602bb74edb5da4713de5ca2926589cd8ad5 Mon Sep 17 00:00:00 2001 From: William Schueller Date: Wed, 27 Nov 2024 22:12:29 +0100 Subject: [PATCH 6/8] Typo in shutil call --- test/test_nonunicode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_nonunicode.py b/test/test_nonunicode.py index 5094f618..6d723662 100644 --- a/test/test_nonunicode.py +++ b/test/test_nonunicode.py @@ -35,7 +35,7 @@ def test_nonunicode_branchname(testrepo): folderpath = 'temp_repo_nonutf' if os.path.exists(folderpath): - shutil.rmdir(folderpath) + shutil.rmtree(folderpath) newrepo = pygit2.clone_repository( path=folderpath, url='https://github.com/pygit2/test_branch_notutf.git' From c541064c0edfe2b0521097d87235dbd2e36b67d8 Mon Sep 17 00:00:00 2001 From: William Schueller Date: Thu, 28 Nov 2024 13:51:58 +0100 Subject: [PATCH 7/8] Using surrogateescape for the caught git error message as well --- pygit2/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygit2/errors.py b/pygit2/errors.py index d95bfaa6..58e0cdc7 100644 --- a/pygit2/errors.py +++ b/pygit2/errors.py @@ -42,7 +42,7 @@ def check_error(err, io=False): # Error message giterr = C.git_error_last() if giterr != ffi.NULL: - message = ffi.string(giterr.message).decode('utf8') + message = ffi.string(giterr.message).decode('utf8',errors='surrogateescape') else: message = f'err {err} (no message provided)' From 805e8082abbbd061d2b19e3713c17bd4f991a77d Mon Sep 17 00:00:00 2001 From: William Schueller Date: Tue, 31 Dec 2024 14:00:09 +0100 Subject: [PATCH 8/8] ruff lint --- pygit2/errors.py | 2 +- test/test_nonunicode.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pygit2/errors.py b/pygit2/errors.py index 58e0cdc7..be13dfc4 100644 --- a/pygit2/errors.py +++ b/pygit2/errors.py @@ -42,7 +42,7 @@ def check_error(err, io=False): # Error message giterr = C.git_error_last() if giterr != ffi.NULL: - message = ffi.string(giterr.message).decode('utf8',errors='surrogateescape') + message = ffi.string(giterr.message).decode('utf8', errors='surrogateescape') else: message = f'err {err} (no message provided)' diff --git a/test/test_nonunicode.py b/test/test_nonunicode.py index 6d723662..06dfc0fc 100644 --- a/test/test_nonunicode.py +++ b/test/test_nonunicode.py @@ -37,10 +37,9 @@ def test_nonunicode_branchname(testrepo): if os.path.exists(folderpath): shutil.rmtree(folderpath) newrepo = pygit2.clone_repository( - path=folderpath, - url='https://github.com/pygit2/test_branch_notutf.git' - ) + path=folderpath, url='https://github.com/pygit2/test_branch_notutf.git' + ) assert bstring in [ (ref.split('/')[-1]).encode('utf8', 'surrogateescape') for ref in newrepo.listall_references() - ] # Remote branch among references: 'refs/remotes/origin/\udcc3master' + ] # Remote branch among references: 'refs/remotes/origin/\udcc3master'