Skip to content

Commit 1ec0b95

Browse files
Support parsing metadata filenames for purls (#36)
Supports parsing .dsc, copyright and changelog files, typically present in the debian package/metadata archives for name and version. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 0c20724 commit 1ec0b95

File tree

2 files changed

+41
-2
lines changed

2 files changed

+41
-2
lines changed

src/debian_inspector/package.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ def from_filename(cls, filename):
9393
return cls(
9494
name=name,
9595
version=version,
96-
original_filename=filename)
96+
original_filename=filename
97+
)
9798

9899
def to_dict(self):
99100
data = {}
@@ -110,16 +111,36 @@ def to_tuple(self):
110111
return tuple(v for v in self.to_dict().values() if v != 'original_filename')
111112

112113

114+
@attrs
115+
class CodeMetadata(CodeArchive):
116+
"""
117+
A .dsc, copyright or changelog file present in the debian
118+
package/metadata archive and contains package information
119+
on the filename or as file contents.
120+
121+
For example in ./changelogs/main/d/diffutils/ there are
122+
files such as:
123+
- diffutils_3.7-5_copyright
124+
- diffutils_3.7-5_changelog
125+
And in .pool/main/b/base-files/ there are files such as:
126+
- base-files_11.1+deb11u8.dsc
127+
"""
128+
129+
113130
def get_nva(filename):
114131
"""
115132
Return a tuple of (name string, Version object, archictecture string or
116133
None) parsed from the `filename` of .deb, .udeb, .orig or .debian archive..
117134
"""
118135
is_known = False
119-
if filename.endswith(('.deb', '.udeb')):
136+
if filename.endswith(('.deb', '.udeb', '.dsc')):
120137
basename, _extension = path.splitext(filename)
121138
is_known = True
122139

140+
elif filename.endswith(('_changelog', '_copyright')):
141+
basename, _, _ = filename.rpartition("_")
142+
is_known = True
143+
123144
elif filename.endswith(('.tar.gz', '.tar.xz', '.tar.bz2', '.tar.lzma')):
124145
# A Format: 3.0 archive.
125146
# Note that we ignore the legacy .diff.gz files for Format: 1.0

tests/test_package.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,24 @@ def test_CodeArchive_from_filename(self):
121121
original_filename=fn)
122122
assert debarch == expected
123123

124+
def test_CodeMetadata_from_filename_dsc(self):
125+
fn = 'base-files_11.1+deb11u8.dsc'
126+
debarch = package.CodeMetadata.from_filename(fn)
127+
expected = package.CodeMetadata(
128+
name='base-files',
129+
version=version.Version(epoch=0, upstream='11.1+deb11u8', revision='0'),
130+
original_filename=fn)
131+
assert debarch == expected
132+
133+
def test_CodeMetadata_from_filename_copyright(self):
134+
fn = 'bash_4.1-3+deb6u2_copyright'
135+
debarch = package.CodeMetadata.from_filename(fn)
136+
expected = package.CodeMetadata(
137+
name='bash',
138+
version=version.Version(epoch=0, upstream='4.1', revision='3+deb6u2'),
139+
original_filename=fn)
140+
assert debarch == expected
141+
124142
def test_CodeArchive_from_filename_supports_tar_gz_bz2_and_xz(self):
125143
package.CodeArchive.from_filename('python2.7_2.7.3-0ubuntu3.4.orig.tar.gz')
126144
package.CodeArchive.from_filename('python2.7_2.7.3-0ubuntu3.4.debian.tar.gz')

0 commit comments

Comments
 (0)