Hi!, the link of GitHub repo seems 404.
Is it moved to another location?
Are there any working examples for the new Python SDK-Gen for Chunked Uploads? I tried to get it to work but it seems to be much more complicated now and the documentation is a bit sparse on that topic. The simple upload function is working fine.
Here’s my code (these methods are in a class with an authenticated client which they access by “self.client”) (it breaks at “self.client.chunked_uploads.upload_file_part”):
def calculate_digest_for_chunk(self, chunk_data):
sha1_hash = hashlib.sha1(chunk_data).digest()
base64_encoded = base64.b64encode(sha1_hash).decode("ascii")
return f"sha={base64_encoded}"
def calculate_file_digest(self, file_path):
"""
Calculate the SHA1 digest of an entire file, base64 encoded.
Args:
file_path: Path to the file
Returns:
String in the format "sha=BASE64_ENCODED_DIGEST"
"""
# Create a hash object
sha1 = hashlib.sha1()
# Open the file in binary mode and read in chunks
with open(file_path, "rb") as f:
# Read the file in 1MB chunks
for chunk in iter(lambda: f.read(1024 * 1024), b""):
sha1.update(chunk)
# Get the digest and encode in base64
base64_encoded = base64.b64encode(sha1.digest()).decode("ascii")
return f"sha={base64_encoded}"
@authentication_required
def upload_large_file(
self,
file_path: pathlib.Path,
parent_folder_id: str,
show_progress: bool = False,
) -> File:
"""
Upload a large file (50MB or larger) to Box using chunked upload.
Args:
client: An authenticated Box client
file_path: Path object to the file to upload
parent_folder_id: ID of the folder to upload to
show_progress: Whether to print progress information
Returns:
File object representing the uploaded file
"""
file_size = os.path.getsize(file_path)
file_name = os.path.basename(file_path)
upload_session = self.client.chunked_uploads.create_file_upload_session(
parent_folder_id, file_size, file_name
)
print("upload_session: ", upload_session)
part_size = upload_session.part_size
total_parts = upload_session.total_parts
# Upload the file in chunks
uploaded_parts = u]
with open(file_path, "rb") as file_stream:
for part_number in range(total_parts):
print("part_number: ", part_number)
# Seek to the correct position for this part
file_stream.seek(part_number * part_size)
# Read the chunk
chunk = file_stream.read(part_size)
if not chunk:
break
content_range_lower = part_number * part_size
content_range_higher = content_range_lower + part_size - 1
if content_range_higher > file_size:
content_range_higher = file_size
content_range = (
f"{content_range_lower}-{content_range_higher}/{file_size}"
)
digest = self.calculate_digest_for_chunk(chunk)
print(digest)
if show_progress:
print(
f"Uploading part {part_number + 1}/{total_parts}, Range: {content_range}, Upload-Session-ID: {upload_session.id}"
)
byte_stream = generate_byte_stream_from_buffer(chunk)
uploaded_part = self.client.chunked_uploads.upload_file_part(
upload_session.id,
byte_stream,
digest,
content_range,
)
uploaded_parts.append(uploaded_part)
part_number += 1
# Commit the upload session
digest = self.calculate_file_digest(file_path)
status = self.client.chunked_uploads.create_file_upload_session_commit(
upload_session.id, uploaded_parts, digest
)
print(status)
return status
I always get an exception there:
Error
Traceback (most recent call last):
Failure: builtins.tuple: (<class 'box_sdk_gen.box.errors.BoxSDKError'>, BoxSDKError("HTTPSConnectionPool(host='upload.box.com', port=443): Max retries exceeded with url: /api/2.0/files/upload_sessions/6D4346B96FE3EE3930FB99D255204E1D (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2417)')))"), <traceback object at 0x000002581109C740>)