From b82b3880b0049e17de84c8b533a6923e124581eb Mon Sep 17 00:00:00 2001 From: Ayush Goyal <36241930+agl29@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:28:22 +0530 Subject: [PATCH] [abfs] Fix download by reading file correctly instead of extra 1 byte always with every chunk (#3472) - When trying to read the file, we are giving wrong endByte value as per https://learn.microsoft.com/en-us/rest/api/storageservices/specifying-the-range-header-for-blob-service-operations#format-2-bytesstartbyte-endbyte. - We were reading 1 byte extra for every 1MB chunk size value i.e instead of 0-1023 we were sending 0-1024. This was messing up with the downloaded file and lead to truncation of end bytes to fit the downloaded size. Header['range'] = headers['range'] = 'bytes=x-y' in this x is starting point and y is end point and both are included hence we removed the 1 byte from y to make sure we are not duplicating the bytes. - We checked and this issue is only for ABFS, other FS in Hue should not show this behaviour. --- desktop/libs/azure/src/azure/abfs/abfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desktop/libs/azure/src/azure/abfs/abfs.py b/desktop/libs/azure/src/azure/abfs/abfs.py index 25a2328c825..9f38a4e2850 100644 --- a/desktop/libs/azure/src/azure/abfs/abfs.py +++ b/desktop/libs/azure/src/azure/abfs/abfs.py @@ -409,7 +409,7 @@ def read(self, path, offset='0', length=0, *args, **kwargs): path = Init_ABFS.strip_scheme(path) headers = self._getheaders() if length != 0 and length != '0': - headers['range'] = 'bytes=%s-%s' % (str(offset), str(int(offset) + int(length))) + headers['range'] = 'bytes=%s-%s' % (str(offset), str(int(offset) + int(length) - 1)) return self._root.get(path, headers=headers)