oxen.remote_repo

get_repo

def get_repo(name: str, host: str = "hub.oxen.ai")

Get a RemoteRepo object for the specified name. For example ‘ox/CatDogBBox’.

Arguments:

  • name - str Name of the repository in the format ‘namespace/repo_name’.
  • host - str The host to connect to. Defaults to ‘hub.oxen.ai’

Returns:

RemoteRepo

create_repo

def create_repo(name: str,
                description="",
                is_public: bool = True,
                host: str = "hub.oxen.ai",
                scheme: str = "https",
                files: List[Tuple[str, str]] = [])

Create a new repository on the remote server.

Arguments:

  • name - str Name of the repository in the format ‘namespace/repo_name’.
  • description - str Description of the repository. Only applicable to OxenHub.
  • is_public - bool Whether the repository is public or private. Only applicable to OxenHub.
  • host - str The host to connect to. Defaults to ‘hub.oxen.ai’
  • scheme - str The scheme to use for the remote url. Default: ‘https’
  • files - List[Tuple[str, str]] A list of tuples containing the path to the file and the contents of the file that you would like to seed the repository with.

Returns:

RemoteRepo

RemoteRepo Objects

class RemoteRepo()

The RemoteRepo class allows you to interact with an Oxen repository without downloading the data locally.

Examples

Add & Commit Files

Adding and committing a file to a remote workspace.

from oxen import RemoteRepo

repo = RemoteRepo("ox/CatDogBBox")
repo.add("/path/to/image.png")
status = repo.status()
print(status.added_files())
repo.commit("Adding my image to the remote workspace.")

Downloading Specific Files

Grab a specific file revision and load it into pandas.

from oxen import RemoteRepo
import pandas as pd

# Connect to the remote repo
repo = RemoteRepo("ox/CatDogBBox")
# Specify the version of the file you want to download
branch = repo.get_branch("my-pets")
# Download takes a file or directory a commit id
repo.download("annotations", revision=branch.commit_id)
# Once you have the data locally, use whatever library you want to explore the data
df = pd.read_csv("annotations/train.csv")
print(df.head())

__init__

def __init__(path: str,
             host: Optional[str] = None,
             revision: str = "main",
             scheme="https")

Create a new RemoteRepo object to interact with.

Arguments:

  • path - str Name of the repository in the format ‘namespace/repo_name’. For example ‘ox/chatbot’
  • host - str The host to connect to. Defaults to ‘hub.oxen.ai’
  • revision - str The branch name or commit id to checkout. Defaults to ‘main’
  • scheme - str The scheme to use for the remote url. Default: ‘https’

create

def create(empty: bool = False, is_public: bool = False)

Will create the repo on the remote server.

Arguments:

  • empty - bool Whether to create an empty repo or not. Default: False
  • is_public - bool Whether the repository is public or private. Default: False

exists

def exists() -> bool

Checks if this remote repo exists on the server.

delete

def delete()

Delete this remote repo from the server.

checkout

def checkout(revision: str, create=False)

Switches the remote repo to the specified revision.

Arguments:

  • revision - str The name of the branch or commit id to checkout.
  • create - bool Whether to create a new branch if it doesn’t exist. Default: False

ls

def ls(directory: Optional[str] = None,
       page_num: int = 1,
       page_size: int = 100)

Lists the contents of a directory in the remote repo.

Arguments:

  • directory - str The directory to list. If None, will list the root directory.
  • page_num - int The page number to return. Default: 1
  • page_size - int The number of items to return per page. Default: 100

download

def download(src: str,
             dst: Optional[str] = None,
             revision: Optional[str] = None)

Download a file or directory from the remote repo.

Arguments:

  • src - str The path to the remote file
  • dst - str | None The path to the local file. If None, will download to the same path as src
  • revision - str | None The branch or commit id to download. Defaults to self.revision

add

def add(local_path: str, directory: str = "")

Stage a file to the remote workspace

Arguments:

  • path - str The path to the local file to be staged
  • directory - str The path in the remote repo where the file will be added

remove

def remove(path: str)

Unstage a file from the remote workspace

Arguments:

  • path - str The path to the file on remote to be removed from staging

restore_df

def restore_df(path: str)

Unstage any changes to the schema or contents of a dataframe file on the remote repo

Arguments:

  • path - str The path to the df on the remote to be restored

status

def status(path: str = "")

Get the status of the remote repo. Returns a StagedData object.

Arguments:

  • path - str The directory or file path on the remote that will be checked for modifications

commit

def commit(message: str)

Commit the staged data in the remote repo with a message.

Arguments:

  • message - str The commit message.

log

def log()

Get the commit history for a remote repo

branches

def branches()

List all branches for a remote repo

get_df_size

def get_df_size(path: str)

Get the size of a dataframe file on the remote repo

Arguments:

  • path - str The path to the df on the remote

get_df_row

def get_df_row(path: str, idx: int)

Fetches a row from the dataframe at the specified path on the remote repo

Arguments:

  • path - str Path to the dataframe on the remote repo
  • idx - int The index of the row to return

get_df_slice

def get_df_slice(path: str,
                 start: int,
                 end: int,
                 columns: Optional[List[str]] = None)

Gets a slice of rows from the dataframe at the specified path on the remote repo

Arguments:

  • path - str Path to the dataframe on the remote repo
  • start - int The start index of the data frame to return
  • end - int The end index of the data frame to return
  • columns - List[str] A list of column names to return. If None, will return all columns.

add_df_row

def add_df_row(path: str, row: dict)

Adds a row to the dataframe at the specified path on the remote repo

Arguments:

  • path - str Path to the dataframe on the remote repo
  • row - dict A dictionary representing the row to be added to the dataframe, where keys are column names and values are the values to be inserted. Schema must exactly match DF on remote repo.

get_branch

def get_branch(branch: str)

Return a branch by name on this repo, if exists

Arguments:

  • branch - str The name of the branch to return

create_branch

def create_branch(branch: str)

Return a branch by name on this repo, creating it from the currently checked out branch if it doesn’t exist

Arguments:

  • branch - str The name to assign to the created branch

create_checkout_branch

def create_checkout_branch(branch: str)

Create a new branch from the currently checked out branch, and switch to it

Arguments:

  • branch - str The name to assign to the created branch

namespace

@property
def namespace() -> str

The namespace for the repo.

name

@property
def name() -> str

The name of the repo.

url

@property
def url() -> str

The remote url for the repo.

revision

@property
def revision() -> str

The branch or commit id for the repo