import traceback import gradio as gr import requests from huggingface_hub import HfApi, whoami from huggingface_hub.utils import build_hf_headers ENDPOINT = "https://huggingface.co" # ENDPOINT = "http://localhost:5564" REPO_TYPES = ["model", "dataset", "space"] def redact_sensitive_text(text, token=None): if not isinstance(text, str): return text if token: token = token if isinstance(token, str) else str(token) return text.replace(token, "[REDACTED_TOKEN]") return text def truncate_text(text, max_length=2000): if len(text) <= max_length: return text return f"{text[:max_length]}... (truncated)" def extract_response_error_text(response, token=None): try: payload = response.json() except ValueError: payload = response.text if isinstance(payload, dict): for key in ("error", "message", "detail", "errors"): if key in payload: payload = payload[key] break message = str(payload).strip() if payload is not None else "" message = redact_sensitive_text(message, token=token) return truncate_text(message) def build_error_details(error, token=None): error_type = type(error).__name__ message = str(error).strip() if not message: message = "No details were provided by the exception." message = redact_sensitive_text(message, token=token) lines = [f"- Type: `{error_type}`", f"- Message: `{message}`"] response = getattr(error, "response", None) response_body = None if response is not None: lines.append(f"- HTTP status: `{response.status_code}`") request_id = response.headers.get("x-request-id") if request_id: lines.append(f"- Request ID: `{request_id}`") response_body = extract_response_error_text(response, token=token) cause = getattr(error, "__cause__", None) or getattr(error, "__context__", None) if cause: cause_message = str(cause).strip() or "No details were provided by the cause." cause_message = redact_sensitive_text(cause_message, token=token) lines.append(f"- Cause: `{type(cause).__name__}: {cause_message}`") return "\n".join(lines), response_body def format_error(error, token=None, context=None, include_traceback=False): details_markdown, response_body = build_error_details(error, token=token) context_block = f"\n{context}\n" if context else "" response_block = "" if response_body: response_block = f"\n\n#### Response body\n```text\n{response_body}\n```" traceback_block = "" if include_traceback: traceback_text = "".join( traceback.format_exception(type(error), error, error.__traceback__) ).strip() traceback_text = redact_sensitive_text(traceback_text, token=token) traceback_text = truncate_text(traceback_text, max_length=8000) traceback_block = f"\n\n#### Traceback\n```text\n{traceback_text}\n```" return ( f"### Error" f"{context_block}\n" f"{details_markdown}\n\n" f"Check token permissions and source/destination names, then retry." f"{response_block}" f"{traceback_block}" ) def raise_on_http_error(response, token=None): if response.ok: return detail = extract_response_error_text(response, token=token) reason = response.reason or "HTTP request failed" error_message = f"{response.status_code} {reason}" if detail: error_message = f"{error_message} | {detail}" request_id = response.headers.get("x-request-id") if request_id: error_message = f"{error_message} | request_id={request_id}" raise requests.HTTPError(error_message, response=response) def repository_exists(repo_id, repo_type, token): if repo_type not in REPO_TYPES: raise ValueError("need to select valid repo type") response = requests.get( f"{ENDPOINT}/api/{repo_type}s/{repo_id}", headers=build_hf_headers(token=token), timeout=30, ) if response.status_code == 404: return False raise_on_http_error(response, token=token) return True def duplicate_repo(source_repo, dst_repo, token, repo_type, private): if repo_type not in REPO_TYPES: raise ValueError("need to select valid repo type") response = requests.post( f"{ENDPOINT}/api/{repo_type}s/{source_repo}/duplicate", headers=build_hf_headers(token=token), json={"repository": dst_repo, "private": private}, timeout=60, ) raise_on_http_error(response, token=token) return response.json().get("url") def duplicate_single_repo(source_repo, dst_repo, token, repo_type, private): try: _ = whoami(token) # ^ this will throw if token is invalid if repository_exists(dst_repo, repo_type, token): return f"Skipped `{source_repo}` because destination `{dst_repo}` already exists." repo_url = duplicate_repo( source_repo=source_repo, dst_repo=dst_repo, token=token, repo_type=repo_type, private=private, ) return ( f"Duplicated `{source_repo}` to `{dst_repo}`.\n\n" f"[Open destination repo]({repo_url})" ) except Exception as e: return format_error( e, token=token, context=f"Failed to duplicate `{source_repo}` -> `{dst_repo}`.", include_traceback=True, ) def list_organization_repositories(source_org, token): api = HfApi(endpoint=ENDPOINT) repos = [] for repo_type, list_fn in ( ("model", api.list_models), ("dataset", api.list_datasets), ("space", api.list_spaces), ): for repo_info in list_fn(author=source_org, token=token, limit=None): repo_id = getattr(repo_info, "id", None) if repo_id: repos.append((repo_type, repo_id)) repos.sort(key=lambda repo: (REPO_TYPES.index(repo[0]), repo[1].lower())) return repos def duplicate_organization(source_org, destination_org, token, private, progress=None): if progress is None: progress = gr.Progress() try: progress(0.0, desc="Validating input and token") source_org = source_org.strip() destination_org = destination_org.strip() if not source_org or not destination_org: raise ValueError("source and destination organizations are required") if "/" in source_org or "/" in destination_org: raise ValueError( "use organization names only (for example: my-org), not repo IDs" ) _ = whoami(token) # ^ this will throw if token is invalid progress(0.05, desc=f"Listing repositories in {source_org}") repositories = list_organization_repositories(source_org, token) if not repositories: return f"No repositories found in `{source_org}`." progress(0.1, desc=f"Listing repositories in {destination_org}") destination_repositories = list_organization_repositories(destination_org, token) destination_repo_index = { (repo_type, repo_id.lower()) for repo_type, repo_id in destination_repositories } listed_repos = "\n".join( [f"- `{repo_type}`: `{repo_id}`" for repo_type, repo_id in repositories] ) successes = [] skipped = [] failures = [] total = len(repositories) for index, (repo_type, source_repo) in enumerate(repositories, start=1): repo_name = source_repo.split("/", maxsplit=1)[-1] destination_repo = f"{destination_org}/{repo_name}" destination_key = (repo_type, destination_repo.lower()) if destination_key in destination_repo_index: progress( 0.1 + (index / total) * 0.85, desc=f"Skipping {index}/{total}: {destination_repo} already exists", ) skipped.append((repo_type, source_repo, destination_repo)) continue progress( 0.1 + (index / total) * 0.85, desc=f"Duplicating {index}/{total}: {source_repo}", ) try: duplicate_repo( source_repo=source_repo, dst_repo=destination_repo, token=token, repo_type=repo_type, private=private, ) successes.append((repo_type, source_repo, destination_repo)) destination_repo_index.add(destination_key) except Exception as error: failure_summary, response_body = build_error_details(error, token=token) failure_summary = failure_summary.replace("`", "") if response_body: failure_summary += f"\nResponse body: {response_body}" failures.append( (repo_type, source_repo, destination_repo, failure_summary) ) progress(1.0, desc="Done") success_details = "\n".join( [f"- `{repo_type}`: `{src}` -> `{dst}`" for repo_type, src, dst in successes] ) skipped_details = "\n".join( [f"- `{repo_type}`: `{src}` -> `{dst}`" for repo_type, src, dst in skipped] ) failure_details = "\n".join( [ f"- `{repo_type}`: `{src}` -> `{dst}`\n```text\n{error}\n```" for repo_type, src, dst, error in failures ] ) output = f""" ### Repositories in `{source_org}` ({len(repositories)}) {listed_repos} ### Duplication results - Successes: {len(successes)} - Skipped (already existed): {len(skipped)} - Failures: {len(failures)} """ if success_details: output += f"\n#### Successful duplicates\n{success_details}\n" if skipped_details: output += f"\n#### Skipped duplicates\n{skipped_details}\n" if failure_details: output += f"\n#### Failed duplicates\n{failure_details}\n" return output except Exception as e: return format_error( e, token=token, context=( f"Failed while duplicating organization `{source_org}` " f"to `{destination_org}`." ), include_traceback=True, ) def run_single_duplication( source_repo, destination_repo, token, repo_type, private, progress=gr.Progress() ): try: source_repo = source_repo.strip() destination_repo = destination_repo.strip() if not source_repo or not destination_repo: return format_error( ValueError("source repository and destination repository are required") ) progress(0.0, desc="Duplicating repository") return duplicate_single_repo( source_repo=source_repo, dst_repo=destination_repo, token=token, repo_type=repo_type, private=private, ) except Exception as error: return format_error( error, token=token, context="Unexpected failure in single-repository duplication workflow.", include_traceback=True, ) def run_organization_duplication( source_org, destination_org, token, private, progress=gr.Progress() ): try: return duplicate_organization( source_org=source_org, destination_org=destination_org, token=token, private=private, progress=progress, ) except Exception as error: return format_error( error, token=token, context="Unexpected failure in organization duplication workflow.", include_traceback=True, ) with gr.Blocks(title="Duplicate Hugging Face repositories") as interface: gr.Markdown("# Duplicate Hugging Face repositories") gr.Markdown( "Use tabs to duplicate a single repo or duplicate all repos from one organization " "to another. Get a write token at https://huggingface.co/settings/tokens." ) with gr.Tabs(): with gr.Tab("Organization"): org_source = gr.Textbox( label="Source organization", placeholder="e.g. my-source-org", ) org_destination = gr.Textbox( label="Destination organization", placeholder="e.g. my-destination-org", ) org_token = gr.Textbox(label="Write access token", type="password") org_private = gr.Checkbox(label="Make duplicated repos private?") org_submit = gr.Button("Duplicate all repositories") org_output = gr.Markdown(label="output") org_submit.click( fn=run_organization_duplication, inputs=[org_source, org_destination, org_token, org_private], outputs=[org_output], ) with gr.Tab("Single repository"): single_source = gr.Textbox( label="Source repository", placeholder="e.g. osanseviero/src", ) single_destination = gr.Textbox( label="Destination repository", placeholder="e.g. osanseviero/dst", ) single_token = gr.Textbox(label="Write access token", type="password") single_repo_type = gr.Dropdown( choices=REPO_TYPES, value="model", label="Repository type", ) single_private = gr.Checkbox(label="Make duplicated repo private?") single_submit = gr.Button("Duplicate repository") single_output = gr.Markdown(label="output") single_submit.click( fn=run_single_duplication, inputs=[ single_source, single_destination, single_token, single_repo_type, single_private, ], outputs=[single_output], ) interface.queue() interface.launch(show_error=True)