From 85db1d775c1c4294bd1dc26a174609b32138fb1a Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Fri, 11 Oct 2024 17:44:38 +1300 Subject: [PATCH 01/31] feat(experiment): add JuliaHub._project_datasets --- src/JuliaHub.jl | 1 + src/datasets.jl | 9 +++++++-- src/projects.jl | 29 +++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 src/projects.jl diff --git a/src/JuliaHub.jl b/src/JuliaHub.jl index b371d3541..276e6a64c 100644 --- a/src/JuliaHub.jl +++ b/src/JuliaHub.jl @@ -30,6 +30,7 @@ include("jobs/request.jl") include("jobs/logging.jl") include("jobs/logging-kafka.jl") include("jobs/logging-legacy.jl") +include("projects.jl") function __init__() # We'll only attempt to determine the local timezone once, when the package loads, diff --git a/src/datasets.jl b/src/datasets.jl index 160340d8c..eb56a2868 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -320,6 +320,11 @@ function datasets( JuliaHubError("Error while retrieving datasets from the server", e, catch_backtrace()) ) end + # Note: unless `shared` is `true`, we filter down to the datasets owned by `username`. + return _parse_dataset_list(datasets; username=shared ? nothing : username) +end + +function _parse_dataset_list(datasets::Vector; username::Union{AbstractString, Nothing}=nothing)::Vector{Dataset} # It might happen that some of the elements of the `datasets` array can not be parsed for some reason, # and the Dataset() constructor will throw. Rather than having `datasets` throw an error (as we would # normally do for invalid backend responses), in this case we handle the situation more gracefully, @@ -331,8 +336,8 @@ function datasets( datasets = map(datasets) do dataset try # We also use the `nothing` method for filtering out datasets that are not owned by the - # current `username` if `shared = false`. - if !shared && (dataset["owner"]["username"] != username) + # current `username`. If `username = nothing`, no filtering is done. + if !isnothing(username) && (dataset["owner"]["username"] != username) return nothing end return Dataset(dataset) diff --git a/src/projects.jl b/src/projects.jl new file mode 100644 index 000000000..b24d1caf1 --- /dev/null +++ b/src/projects.jl @@ -0,0 +1,29 @@ +# Experimental function to fetch datasets associated with a project. +# +#= +using JuliaHub +auth = JuliaHub.authenticate("") +ds = JuliaHub._project_datasets(auth, "") +=# + +function _project_datasets(auth::Authentication, project_uuid::AbstractString) + project_uuid = tryparse(UUIDs.UUID, project_uuid) + if isnothing(project_uuid) + throw(ArgumentError("project_uuid must be a UUID, got '$(project_uuid)'")) + end + return _project_datasets(auth, project_uuid) +end + +function _project_datasets(auth::Authentication, project_uuid::UUIDs.UUID) + r = JuliaHub._restcall( + auth, :GET, ("datasets",), nothing; + query = (; project = string(project_uuid)) + ) + if r.status == 400 + throw(InvalidRequestError("Unable to fetch datasets for project '$(project_uuid)' ($(r.body))")) + elseif r.status != 200 + JuliaHub._throw_invalidresponse(r; msg="Unable to fetch datasets.") + end + datasets, _ = JuliaHub._parse_response_json(r, Vector) + return _parse_dataset_list(datasets) +end From 878be78f72c5508eaf78d98e873aa7e79a83684f Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Fri, 28 Feb 2025 22:56:25 +1300 Subject: [PATCH 02/31] upload WIP --- src/JuliaHub.jl | 2 +- src/authentication.jl | 51 +++++++-- src/datasets.jl | 51 +++++---- src/projects.jl | 239 +++++++++++++++++++++++++++++++++++++++--- 4 files changed, 298 insertions(+), 45 deletions(-) diff --git a/src/JuliaHub.jl b/src/JuliaHub.jl index 276e6a64c..9162d924f 100644 --- a/src/JuliaHub.jl +++ b/src/JuliaHub.jl @@ -11,7 +11,7 @@ import SHA import TimeZones import TOML import URIs -import UUIDs +using UUIDs: UUIDs, UUID const _LOCAL_TZ = Ref{Dates.TimeZone}() diff --git a/src/authentication.jl b/src/authentication.jl index 84e42c23c..7c61756b7 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -26,6 +26,7 @@ Objects have the following properties: * `server :: URIs.URI`: URL of the JuliaHub instance this authentication token applies to. * `username :: String`: user's JuliaHub username (used for e.g. to namespace datasets) * `token :: JuliaHub.Secret`: a [`Secret`](@ref) object storing the JuliaHub authentication token +* `project_id :: Union{UUID, Nothing}`: the project ID of the currently active project. Note that the object is mutable, and hence will be shared as it is passed around. And at the same time, functions such as [`reauthenticate!`](@ref) may modify the object. @@ -38,6 +39,7 @@ mutable struct Authentication server::URIs.URI username::String token::Secret + project_id::Union{UUIDs.UUID, Nothing} _api_version::VersionNumber _tokenpath::Union{String, Nothing} _email::Union{String, Nothing} @@ -48,6 +50,7 @@ mutable struct Authentication tokenpath::Union{AbstractString, Nothing}=nothing, email::Union{AbstractString, Nothing}=nothing, expires::Union{Integer, Nothing}=nothing, + project_uuid::Union{UUIDs.UUID, Nothing}=nothing, ) # The authentication() function should take care of sanitizing the inputs here, # so it is fine to just error() here. @@ -58,7 +61,7 @@ mutable struct Authentication @warn "Invalid auth.toml token path passed to Authentication, ignoring." tokenpath tokenpath = nothing end - new(server, username, token, api_version, tokenpath, email, expires) + new(server, username, token, project_uuid, api_version, tokenpath, email, expires) end end @@ -66,7 +69,11 @@ function Base.show(io::IO, auth::Authentication) print(io, "JuliaHub.Authentication(") print(io, '"', auth.server, "\", ") print(io, '"', auth.username, "\", ") - print(io, "*****)") + print(io, "*****") + if !isnothing(auth.project_id) + print(io, "; project_id = \"", auth.project_id, "\"") + end + print(io, ")") end function _sanitize_juliahub_uri(f::Base.Callable, server::URIs.URI) @@ -211,6 +218,7 @@ function authenticate( force::Bool=false, maxcount::Integer=_DEFAULT_authenticate_maxcount, hook::Union{Base.Callable, Nothing}=nothing, + project::Union{AbstractString, UUIDs.UUID, Nothing, Missing}=missing, ) maxcount >= 1 || throw(ArgumentError("maxcount must be >= 1, got '$maxcount'")) if !isnothing(hook) && !hasmethod(hook, Tuple{AbstractString}) @@ -220,8 +228,9 @@ function authenticate( ), ) end + project_uuid = _normalize_project(project) server_uri = _juliahub_uri(server) - auth = Mocking.@mock _authenticate(server_uri; force, maxcount, hook) + auth = Mocking.@mock _authenticate(server_uri; force, maxcount, hook, project_uuid) global __AUTH__[] = auth return auth end @@ -252,14 +261,16 @@ function _juliahub_uri(server::Union{AbstractString, Nothing}) end function _authenticate( - server_uri::URIs.URI; force::Bool, maxcount::Integer, hook::Union{Base.Callable, Nothing} + server_uri::URIs.URI; + force::Bool, maxcount::Integer, hook::Union{Base.Callable, Nothing}, + project_uuid::Union{UUID, Nothing}, ) isnothing(hook) || PkgAuthentication.register_open_browser_hook(hook) try # _authenticate either returns a valid token, or throws auth_toml = _authenticate_retry(string(server_uri), 1; force, maxcount) # Note: _authentication may throw, which gets passed on to the user - _authentication(server_uri; auth_toml...) + _authentication(server_uri; project_uuid, auth_toml...) finally isnothing(hook) || PkgAuthentication.clear_open_browser_hook() end @@ -332,6 +343,7 @@ function _authentication( email::Union{AbstractString, Nothing}=nothing, username::Union{AbstractString, Nothing}=nothing, tokenpath::Union{AbstractString, Nothing}=nothing, + project_uuid::Union{UUID, Nothing}=nothing, ) # If something goes badly wrong in _get_api_information, it may throw. We won't really # be able to proceed, since we do not know what JuliaHub APIs to use, so we need to @@ -367,10 +379,37 @@ function _authentication( else username = api.username end - return Authentication(server, api.api_version, username, token; email, expires, tokenpath) + return Authentication( + server, api.api_version, username, token; + email, expires, tokenpath, project_uuid, + ) end _authentication(server::AbstractString; kwargs...) = _authentication(URIs.URI(server); kwargs...) +function _normalize_project( + project::Union{AbstractString, UUIDs.UUID, Nothing, Missing} +)::Union{UUID, Nothing} + if ismissing(project) + project = get(ENV, "JULIAHUB_PROJECT_UUID", nothing) + end + if isnothing(project) + return nothing + elseif isa(project, UUIDs.UUID) + return project + elseif isa(project, AbstractString) + project_uuid = tryparse(UUIDs.UUID, project) + if isnothing(project) + throw( + ArgumentError( + "Invalid project_id passed to Authentication() - not a UUID: $(project)" + ), + ) + end + return project_uuid::UUID + end + error("Bug. Unimplemented case.") +end + """ JuliaHub.check_authentication(; [auth::Authentication]) -> Bool diff --git a/src/datasets.jl b/src/datasets.jl index 04c08eb94..c40eeb121 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -324,7 +324,9 @@ function datasets( return _parse_dataset_list(datasets; username=shared ? nothing : username) end -function _parse_dataset_list(datasets::Vector; username::Union{AbstractString, Nothing}=nothing)::Vector{Dataset} +function _parse_dataset_list( + datasets::Vector; username::Union{AbstractString, Nothing}=nothing +)::Vector{Dataset} # It might happen that some of the elements of the `datasets` array can not be parsed for some reason, # and the Dataset() constructor will throw. Rather than having `datasets` throw an error (as we would # normally do for invalid backend responses), in this case we handle the situation more gracefully, @@ -587,7 +589,7 @@ function upload_dataset end # # Acquire an upload for the dataset. By this point, the dataset with this name # should definitely exist, although race conditions are always a possibility. - r = _open_dataset_version(dataset_name; auth) + r = _open_dataset_version(auth, dataset_name) if (r.status == 404) && !create # A non-existent dataset if create=false indicates a user error. throw( @@ -599,23 +601,7 @@ function upload_dataset end # Any other 404 or other non-200 response indicates a backend failure _throw_invalidresponse(r) end - upload_config, _ = _parse_response_json(r, Dict) - # Verify that the dtype of the remote dataset is what we expect it to be. - if upload_config["dataset_type"] != dtype - if newly_created_dataset - # If we just created the dataset, then there has been some strange error if dtypes - # do not match. - throw(JuliaHubError("Dataset types do not match.")) - else - # Otherwise, it's a user error (i.e. they are trying to update dataset with the wrong - # dtype). - throw( - InvalidRequestError( - "Local data type ($dtype) does not match existing dataset dtype $(upload_config["dataset_type"])" - ), - ) - end - end + upload_config = _check_dataset_upload_config(r, dtype) # Upload the actual data try _upload_dataset(upload_config, local_path; progress) @@ -625,7 +611,7 @@ function upload_dataset end # Finalize the upload try # _close_dataset_version will also throw on non-200 responses - _close_dataset_version(dataset_name, upload_config; local_path, auth) + _close_dataset_version(auth, dataset_name, upload_config; local_path) catch e throw(JuliaHubError("Finalizing upload failed", e, catch_backtrace())) end @@ -640,6 +626,27 @@ function upload_dataset end return dataset((username, dataset_name); auth) end +function _check_dataset_upload_config(r::_RESTResponse, expected_dtype::AbstractString) + upload_config, _ = _parse_response_json(r, Dict) + # Verify that the dtype of the remote dataset is what we expect it to be. + if upload_config["dataset_type"] != expected_dtype + if newly_created_dataset + # If we just created the dataset, then there has been some strange error if dtypes + # do not match. + throw(JuliaHubError("Dataset types do not match.")) + else + # Otherwise, it's a user error (i.e. they are trying to update dataset with the wrong + # dtype). + throw( + InvalidRequestError( + "Local data type ($expected_dtype) does not match existing dataset dtype $(upload_config["dataset_type"])" + ), + ) + end + end + return upload_config +end + function _dataset_dtype(local_path::AbstractString) if isdir(local_path) return "BlobTree" @@ -689,7 +696,7 @@ function _new_dataset( ) end -function _open_dataset_version(name; auth::Authentication=__auth__())::_RESTResponse +function _open_dataset_version(auth::Authentication, name::AbstractString)::_RESTResponse _restcall(auth, :POST, "user", "datasets", name, "versions") end @@ -740,7 +747,7 @@ function _upload_dataset(upload_config, local_path; progress::Bool) end function _close_dataset_version( - name, upload_config; local_path=nothing, auth::Authentication=__auth__() + auth::Authentication, name, upload_config; local_path=nothing )::_RESTResponse body = Dict( "name" => name, diff --git a/src/projects.jl b/src/projects.jl index b24d1caf1..cd4a8cedf 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -1,29 +1,236 @@ -# Experimental function to fetch datasets associated with a project. -# -#= -using JuliaHub -auth = JuliaHub.authenticate("") -ds = JuliaHub._project_datasets(auth, "") -=# - -function _project_datasets(auth::Authentication, project_uuid::AbstractString) - project_uuid = tryparse(UUIDs.UUID, project_uuid) +struct ProjectDataset + project_uuid::UUIDs.UUID + dataset::Dataset + is_writable::Bool +end + +function Base.getproperty(pd::ProjectDataset, name::Symbol) + if name in fieldnames(ProjectDataset) + return getfield(pd, name) + elseif name in propertynames(Dataset) + return getproperty(getfield(pd, :dataset), name) + else + throw(ArgumentError("No property $name for ProjectDataset")) + end +end + + +""" + struct ProjectNotSetError <: JuliaHubException + +Exception thrown when the authentication object is not set to a project, but the +operation is meant to take place in the context of a project. +""" +struct ProjectNotSetError <: JuliaHubException end +function Base.showerror(io::IO, e::ProjectNotSetError) + print(io, "ProjectNotSetError: authentication object not associated with a project") +end + +const ProjectReference = Union{AbstractString, UUIDs.UUID} + +# Parses the standard project::Union{ProjectReference, Nothing} we pass to +# project_* function into a project UUID object (or throws the appropriate error). +function _project_uuid(auth::Authentication, project::Union{ProjectReference, Nothing})::UUIDs.UUID + if isnothing(project) + if isnothing(auth.project_id) + throw(ProjectNotSetError()) + else + return auth.project_id + end + elseif isa(project, UUIDs.UUID) + return project + elseif isa(project, AbstractString) + project_uuid = tryparse(UUIDs.UUID, project) + if isnothing(project_uuid) + throw(ArgumentError("`project` must be a UUID, got '$(project)'")) + end + return project_uuid + else + error("Bug. Unimplemented project reference: $(project)::$(typeof(project))") + end +end + +""" +JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> Dataset + +Looks up a dataset in the context of a project. +""" +function project_dataset end + +function project_dataset( + dataset::Dataset; + project::Union{ProjectReference, Nothing}, + auth::Authentication=__auth__(), +) + project_uuid = _project_uuid(auth, project) + datasets = _project_datasets(auth, project_uuid) + for project_dataset in datasets + if project_dataset.uuid == dataset.uuid + return project_dataset + end + end + throw( + InvalidRequestError( + "Dataset uuid:$(dataset.uuid) ('$(dataset.username)/$(dataset.dataset_name)') not attached to project '$(project_uuid)'." + ), + ) +end + +function project_dataset( + dsref::_DatasetRefTuple; + project::Union{ProjectReference, Nothing}, + auth::Authentication=__auth__(), +) + username, dataset_name = dsref + project_uuid = _project_uuid(auth, project) + datasets = _project_datasets(auth, project_uuid) + for dataset in datasets + if (dataset.owner == username) && (dataset.name == dataset_name) + return dataset + end + end + throw( + InvalidRequestError( + "Dataset '$(username)/$(dataset_name)' not attached to project '$(project_uuid)'." + ), + ) +end + +function project_dataset( + dataset_name::AbstractString; + project::Union{ProjectReference, Nothing}, + auth::Authentication=__auth__(), +) + return project_dataset((auth.username, dataset_name); project, auth) +end + +""" + JuliaHub.project_datasets([project::Union{AbstractString, UUID}]; [auth::Authentication]) -> Vector{Dataset} + +Returns the list of datasets linked to the given project. +""" +function project_datasets end + +function project_datasets(; auth::Authentication=__auth__()) + project_id = auth.project_id + if isnothing(project_id) + throw(ArgumentError("Not authenticated in the context of a project.")) + end + return _project_datasets(auth, project_id) +end + +function project_datasets(project::AbstractString; auth::Authentication=__auth__()) + project_uuid = tryparse(UUIDs.UUID, project) if isnothing(project_uuid) - throw(ArgumentError("project_uuid must be a UUID, got '$(project_uuid)'")) + throw(ArgumentError("`project` must be a UUID, got '$(project)'")) end - return _project_datasets(auth, project_uuid) + return project_datasets(project_uuid; auth) end -function _project_datasets(auth::Authentication, project_uuid::UUIDs.UUID) +function _project_datasets(auth::Authentication, project::UUIDs.UUID) r = JuliaHub._restcall( auth, :GET, ("datasets",), nothing; - query = (; project = string(project_uuid)) + query=(; project=string(project)), ) if r.status == 400 - throw(InvalidRequestError("Unable to fetch datasets for project '$(project_uuid)' ($(r.body))")) + throw( + InvalidRequestError( + "Unable to fetch datasets for project '$(project)' ($(r.body))" + ), + ) elseif r.status != 200 JuliaHub._throw_invalidresponse(r; msg="Unable to fetch datasets.") end datasets, _ = JuliaHub._parse_response_json(r, Vector) - return _parse_dataset_list(datasets) + return map(_parse_dataset_list(datasets)) do dataset + @assert dataset._json["project"]["project_id"] == string(project) + ProjectDataset(project, dataset, dataset._json["project"]["is_writable"]) + end +end + +""" + JuliaHub.upload_project_dataset(dataset::DatasetReference, local_path; [auth,] kwargs...) -> Dataset + +Uploads a new version of a project-linked dataset. + +!!! note "Permissions" + + Note that in order for this to work, you need to have edit rights on the projects and + the dataset needs to have been marked writable by the dataset owner. + +!!! tip + + The function call is functionally equivalent to the following [`upload_dataset`](@ref) call + + ``` + JuliaHub.upload_dataset( + dataset, local_path; + create=false, update=true, replace=false, + ) + ``` + + except that the upload is associated with a project. +""" +function upload_project_dataset end + +function upload_project_dataset( + dataset::Dataset, + local_path::AbstractString; + progress::Bool=true, + project::Union{ProjectReference, Nothing}=nothing, + # Authentication + auth::Authentication=__auth__(), +) + project_uuid = _project_uuid(auth, project) + dtype = _dataset_dtype(local_path) + + # Actually attempt the upload + r = _open_dataset_version(auth, dataset.uuid, project_uuid) + if r.status in (400, 403, 404) + throw( + InvalidRequestError( + "Can't upload :cry:" + ), + ) + elseif r.status != 200 + # Other response codes indicate a backend failure + _throw_invalidresponse(r) + end + # ... + upload_config = _check_dataset_upload_config(r, dtype) + # Upload the actual data + try + _upload_dataset(upload_config, local_path; progress) + catch e + throw(JuliaHubError("Data upload failed", e, catch_backtrace())) + end + # Finalize the upload + try + # _close_dataset_version will also throw on non-200 responses + _close_dataset_version(auth, dataset_name, upload_config; local_path) + catch e + throw(JuliaHubError("Finalizing upload failed", e, catch_backtrace())) + end + # If everything was successful, we'll return an updated DataSet object. + return dataset((username, dataset_name); auth) +end + +function upload_project_dataset( + ::Union{_DatasetRefTuple, AbstractString} +) +end + +# This calls the /datasets/{uuid}/versions?project={uuid} endpoint, +# which is different from /user/datasets/{name}/versions endpoint +# the other method calls. +function _open_dataset_version( + auth::Authentication, dataset_uuid::UUID, project_uuid::UUID +)::_RESTResponse + body = Dict("project" => string(project_uuid)) + return JuliaHub._restcall( + auth, + :POST, + ("datasets", string(dataset_uuid), "versions"), + JSON.json(body), + ) end From 069b44b5848ccf602484af2398f1bcb7e39fff38 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 3 Mar 2025 19:25:55 +1300 Subject: [PATCH 03/31] end-to-end wip --- src/datasets.jl | 5 ++ src/projects.jl | 122 +++++++++++++++++++++++++++++++++++++++-------- test/datasets.jl | 8 ++++ 3 files changed, 116 insertions(+), 19 deletions(-) diff --git a/src/datasets.jl b/src/datasets.jl index c40eeb121..fa9e50a64 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -145,9 +145,14 @@ function Dataset(d::Dict) ) end +function Base.propertynames(::Dataset) + return (:owner, :name, :uuid, :dtype, :size, :versions, :description, :tags) +end + function Base.show(io::IO, d::Dataset) print(io, "JuliaHub.dataset((\"", d.owner, "\", \"", d.name, "\"))") end + function Base.show(io::IO, ::MIME"text/plain", d::Dataset) printstyled(io, "Dataset:"; bold=true) print(io, " ", d.name, " (", d.dtype, ")") diff --git a/src/projects.jl b/src/projects.jl index cd4a8cedf..ea64abe5b 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -1,19 +1,56 @@ +""" + struct ProjectDataset + +A dataset object returned by the functions that return project dataset links. + +Has the same fields as [`Dataset`](@ref) plus the following fields +that are specific to the project-dataset link: + +- `project_uuid::UUID`: identifies the project in the context of which the dataset was listed +- `is_writable :: Bool`: whether this dataset has been marked writable by the dataset owner +""" struct ProjectDataset + _dataset::Dataset project_uuid::UUIDs.UUID - dataset::Dataset is_writable::Bool end function Base.getproperty(pd::ProjectDataset, name::Symbol) + dataset = getfield(pd, :_dataset) if name in fieldnames(ProjectDataset) return getfield(pd, name) - elseif name in propertynames(Dataset) - return getproperty(getfield(pd, :dataset), name) + elseif name in propertynames(dataset) + return getproperty(dataset, name) else throw(ArgumentError("No property $name for ProjectDataset")) end end +function Base.show(io::IO, pd::ProjectDataset) + print( + io, + "JuliaHub.project_dataset((\"", + pd.owner, + "\", \"", + pd.name, + "\"); project=\"", + pd.project_uuid, + "\")", + ) +end +function Base.show(io::IO, ::MIME"text/plain", pd::ProjectDataset) + printstyled(io, "ProjectDataset:"; bold=true) + print(io, " ", pd.name, " (", pd.dtype, ")") + print(io, "\n owner: ", pd.owner) + print( + io, "\n project: ", pd.project_uuid, " ", + pd.is_writable ? "(writable)" : "(not writable)", + ) + print(io, "\n description: ", pd.description) + print(io, "\n versions: ", length(pd.versions)) + print(io, "\n size: ", pd.size, " bytes") + isempty(pd.tags) || print(io, "\n tags: ", join(pd.tags, ", ")) +end """ struct ProjectNotSetError <: JuliaHubException @@ -51,7 +88,7 @@ function _project_uuid(auth::Authentication, project::Union{ProjectReference, No end """ -JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> Dataset + JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> Dataset Looks up a dataset in the context of a project. """ @@ -59,7 +96,7 @@ function project_dataset end function project_dataset( dataset::Dataset; - project::Union{ProjectReference, Nothing}, + project::Union{ProjectReference, Nothing}=nothing, auth::Authentication=__auth__(), ) project_uuid = _project_uuid(auth, project) @@ -78,7 +115,7 @@ end function project_dataset( dsref::_DatasetRefTuple; - project::Union{ProjectReference, Nothing}, + project::Union{ProjectReference, Nothing}=nothing, auth::Authentication=__auth__(), ) username, dataset_name = dsref @@ -98,7 +135,7 @@ end function project_dataset( dataset_name::AbstractString; - project::Union{ProjectReference, Nothing}, + project::Union{ProjectReference, Nothing}=nothing, auth::Authentication=__auth__(), ) return project_dataset((auth.username, dataset_name); project, auth) @@ -142,10 +179,38 @@ function _project_datasets(auth::Authentication, project::UUIDs.UUID) JuliaHub._throw_invalidresponse(r; msg="Unable to fetch datasets.") end datasets, _ = JuliaHub._parse_response_json(r, Vector) - return map(_parse_dataset_list(datasets)) do dataset - @assert dataset._json["project"]["project_id"] == string(project) - ProjectDataset(project, dataset, dataset._json["project"]["is_writable"]) + n_erroneous_datasets = 0 + datasets = map(_parse_dataset_list(datasets)) do dataset + try + project_json = _get_json(dataset._json, "project", Dict) + project_json_uuid = _get_json(project_json, "project_id", String; msg=".project") + if project_json_uuid != string(project) + @debug "Invalid dataset in GET /datasets?project= response" dataset project_json_uuid project + n_erroneous_datasets += 1 + return nothing + end + is_writable = _get_json( + project_json, + "is_writable", + Bool; + msg="Unable to parse .project in /datasets?project response", + ) + return ProjectDataset(dataset, project, is_writable) + catch e + isa(e, JuliaHubError) || rethrow(e) + @debug "Invalid dataset in GET /datasets?project= response" dataset exception = ( + e, catch_backtrace() + ) + n_erroneous_datasets += 1 + return nothing + end + end + if n_erroneous_datasets > 0 + @warn "The JuliaHub GET /datasets?project= response contains erroneous project datasets. Omitting $(n_erroneous_datasets) entries." end + # We'll filter down to just ProjectDataset objects, and enforce + # type-stability of the array type here. + return ProjectDataset[pd for pd in datasets if isa(pd, ProjectDataset)] end """ @@ -174,7 +239,7 @@ Uploads a new version of a project-linked dataset. function upload_project_dataset end function upload_project_dataset( - dataset::Dataset, + ds::Dataset, local_path::AbstractString; progress::Bool=true, project::Union{ProjectReference, Nothing}=nothing, @@ -185,13 +250,11 @@ function upload_project_dataset( dtype = _dataset_dtype(local_path) # Actually attempt the upload - r = _open_dataset_version(auth, dataset.uuid, project_uuid) + r = _open_dataset_version(auth, ds.uuid, project_uuid) if r.status in (400, 403, 404) - throw( - InvalidRequestError( - "Can't upload :cry:" - ), - ) + # These response codes indicate a problem with the request + msg = "Unable to upload to dataset ($(ds.owner), $(ds.name)): $(r.json) (code: $(r.status))" + throw(InvalidRequestError(msg)) elseif r.status != 200 # Other response codes indicate a backend failure _throw_invalidresponse(r) @@ -207,12 +270,14 @@ function upload_project_dataset( # Finalize the upload try # _close_dataset_version will also throw on non-200 responses - _close_dataset_version(auth, dataset_name, upload_config; local_path) + r = _close_dataset_version(auth, ds.uuid, upload_config; local_path) + if r.status != 200 + end catch e throw(JuliaHubError("Finalizing upload failed", e, catch_backtrace())) end # If everything was successful, we'll return an updated DataSet object. - return dataset((username, dataset_name); auth) + return project_dataset(ds; project, auth) end function upload_project_dataset( @@ -234,3 +299,22 @@ function _open_dataset_version( JSON.json(body), ) end + +function _close_dataset_version( + auth::Authentication, dataset_uuid::UUID, upload_config; local_path +)::_RESTResponse + body = Dict( + "upload_id" => upload_config["upload_id"], + "action" => "close", + ) + if isnothing(local_path) + body["filename"] = local_path + end + return _restcall( + auth, + :POST, + ("datasets", string(dataset_uuid), "versions"), + JSON.json(body); + headers=["Content-Type" => "application/json"], + ) +end diff --git a/test/datasets.jl b/test/datasets.jl index 2506f2242..accbb5859 100644 --- a/test/datasets.jl +++ b/test/datasets.jl @@ -62,6 +62,14 @@ end @test ds_updated.owner == ds.owner @test ds_updated.dtype == ds.dtype @test ds_updated.description == ds.description + + @testset "propertynames()" begin + expected = filter( + s -> !startswith(string(s), "_"), + fieldnames(JuliaHub.Dataset), + ) + @test Set(propertynames(pd._dataset)) == Set(expected) + end end let ds = JuliaHub.dataset(("username", "example-dataset"); throw=false) @test ds isa JuliaHub.Dataset From bae352538f3accb49516723ac681c1db608b5807 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 10 Mar 2025 20:28:29 +1300 Subject: [PATCH 04/31] wip docs & cleanup --- docs/make.jl | 1 + docs/src/reference/exceptions.md | 2 + docs/src/reference/projects.md | 53 ++++++++++++++++++++++++ src/authentication.jl | 28 +++++++++++++ src/datasets.jl | 7 ++-- src/projects.jl | 69 ++++++++++++++++++++++++-------- src/utils.jl | 19 +++++++++ 7 files changed, 159 insertions(+), 20 deletions(-) create mode 100644 docs/src/reference/projects.md diff --git a/docs/make.jl b/docs/make.jl index bba0b043f..cdff5cc73 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -65,6 +65,7 @@ const PAGES_REFERENCE = [ "reference/job-submission.md", "reference/jobs.md", "reference/datasets.md", + "reference/projects.md", "reference/exceptions.md", ] Mocking.apply(mocking_patch) do diff --git a/docs/src/reference/exceptions.md b/docs/src/reference/exceptions.md index 7ec669289..6785c5f92 100644 --- a/docs/src/reference/exceptions.md +++ b/docs/src/reference/exceptions.md @@ -24,6 +24,8 @@ InvalidRequestError JuliaHubConnectionError JuliaHubError PermissionError +ProjectNotSetError +InvalidJuliaHubVersion ``` ## Index diff --git a/docs/src/reference/projects.md b/docs/src/reference/projects.md new file mode 100644 index 000000000..bcc0a0e8b --- /dev/null +++ b/docs/src/reference/projects.md @@ -0,0 +1,53 @@ +```@meta +CurrentModule=JuliaHub +``` + +# Projects + +These APIs allow you to interact with datasets that have been attached to projects. + +* [`project_datasets`](@ref) and [`project_dataset`](@ref) let you list and access datasets linked to a project +* [`upload_project_dataset`](@ref) allows uploading new versions of project-linked datasets + +## Automatic project authentication + +The [`Authentication`](@ref) object can be associated with a default project UUID, which will +then be used to for all _project_ operations, unless an explicit `project` gets passed to +override the default. + +Importantly, [`JuliaHub.authenticate`](@ref) will automatically pick up the the JuliaHub +project UUID from the `JULIAHUB_PROJECT_UUID` environment variable. This means in JuliaHub +cloud jobs and IDEs, it is not necessary to manually set the project, and JuliaHub.jl +will automatically. +However, you can opt-out of this behavior by explicitly passing a `project=nothing` to +[`JuliaHub.authenticate`](@ref). + +If you explicitly + +you can always pass `project= + +- `JULIAHUB_PROJECT_UUID` + +You can always verify that your operations are running in the context of the correct project +by checking the [`Authentication`](@ref) object, e.g. via [`current_authentication`](@ref): + +```wip-jldoctest +julia> JuliaHub.current_authentication() +... +``` + +## Reference + +```@docs +ProjectDataset +project_datasets +project_dataset +upload_project_dataset +ProjectReference +``` + +## Index + +```@index +Pages = ["project_datasets.md"] +``` diff --git a/src/authentication.jl b/src/authentication.jl index 7c61756b7..247c054ad 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -168,6 +168,7 @@ end server::AbstractString = Pkg.pkg_server(); force::Bool = false, maxcount::Integer = $(_DEFAULT_authenticate_maxcount), + [project::Union{AbstractString, UUIDs.UUID, Nothing}], [hook::Base.Callable] ) -> JuliaHub.Authentication JuliaHub.authenticate(server::AbstractString, token::Union{AbstractString, JuliaHub.Secret}) -> JuliaHub.Authentication @@ -201,6 +202,33 @@ The returned [`Authentication`](@ref) object is also cached globally (overwritin cached authentications), making it unnecessary to pass the returned object manually to other function calls. This is useful for interactive use, but should not be used in library code, as different authentication calls may clash. + +# Project Context + +An [`Authentication`](@ref) object can also specify the default JuliaHub project. +This can be set by passing the optional `project` argument, which works as follows: + +- If the `project` value is not passed, JuliaHub.jl will attempt to pick up the the project UUID + from the `JULIAHUB_PROJECT_UUID` environment variable, and will fall back to the non-project + context if that is not set. + +- If you pass an explicit UUID (either as a string or an `UUID` object), that will then be used + as the project. Note that a UUID passed as a string must be a syntactically correct UUID. + +- If you pass `nothing`, that make JuliaHub.jl ignore any values in the `JULIAHUB_PROJECT_UUID` + environment variable. + +!!! note "JULIAHUB_PROJECT_UUID" + + Generally, in JuliaHub jobs and cloud IDE environments that are launched in the context of a + project, the `JULIAHUB_PROJECT_UUID` is automatically set, and JuliaHub.jl will pick it up + automatically, unless explicitly disabled with `project=nothing`. + +!!! warn "Project access checks" + + When the [`Authentication`](@ref) object is constructed, access to or existence of the specified + project is not checked. However, if you attempt any project operations with with such an + authentication object, they will fail and throw an error. """ function authenticate end diff --git a/src/datasets.jl b/src/datasets.jl index fa9e50a64..ebe923ff6 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -1,9 +1,10 @@ const _DOCS_nondynamic_datasets_object_warning = """ !!! warning "Non-dynamic dataset objects" - [`Dataset`](@ref) objects represents the dataset metadata when the Julia object was created - (e.g. with [`dataset`](@ref)), and are not automatically kept up to date. To refresh the dataset - metadata, you can pass the existing [`Dataset`](@ref) to [`JuliaHub.dataset`](@ref). + [`Dataset`](@ref) and [`ProjectDataset`](@ref) objects represents the dataset metadata when the + Julia object was created (e.g. with [`dataset`](@ref)), and are not automatically kept up to date. + To refresh the dataset metadata, you can pass the existing [`Dataset`](@ref) to [`JuliaHub.dataset`](@ref), + or [`ProjectDataset`](@ref) to [`project_dataset`](@ref). """ Base.@kwdef struct _DatasetStorage diff --git a/src/projects.jl b/src/projects.jl index ea64abe5b..7f9d0d2f5 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -1,10 +1,31 @@ +""" + struct ProjectNotSetError <: JuliaHubException + +Exception thrown when the authentication object is not set to a project, nor was +an explicit project UUID provided, but the operation requires a project to be +specified. +""" +struct ProjectNotSetError <: JuliaHubException end + +function Base.showerror(io::IO, e::ProjectNotSetError) + print(io, "ProjectNotSetError: authentication object not associated with a project") +end + +function _assert_projects_enabled(auth::Authentication) + # The different project APIs are only present in JuliaHub 6.9 and later. + if auth._api_version < v"0.2.0" + msg = "Project APIs got added in JuliaHub 6.9 (expected API version >= 0.2.0, got $(auth._api_version), for $(auth.server))" + throw(InvalidJuliaHubVersion(msg)) + end +end + """ struct ProjectDataset A dataset object returned by the functions that return project dataset links. -Has the same fields as [`Dataset`](@ref) plus the following fields -that are specific to the project-dataset link: +Has the same fields as [`Dataset`](@ref) plus the following fields that are specific +to project-dataset links: - `project_uuid::UUID`: identifies the project in the context of which the dataset was listed - `is_writable :: Bool`: whether this dataset has been marked writable by the dataset owner @@ -53,20 +74,20 @@ function Base.show(io::IO, ::MIME"text/plain", pd::ProjectDataset) end """ - struct ProjectNotSetError <: JuliaHubException + const ProjectReference :: Type -Exception thrown when the authentication object is not set to a project, but the -operation is meant to take place in the context of a project. -""" -struct ProjectNotSetError <: JuliaHubException end -function Base.showerror(io::IO, e::ProjectNotSetError) - print(io, "ProjectNotSetError: authentication object not associated with a project") -end +Type constraint on the argument that specifies the project in projects-related +APIs that (e.g. [`project_datasets`](@ref)). +Presently, you can specify the project by directly passing the project UUID. +The UUID should be either a string (`<: AbstractString`) or an `UUIDs.UUID` object. +""" const ProjectReference = Union{AbstractString, UUIDs.UUID} # Parses the standard project::Union{ProjectReference, Nothing} we pass to # project_* function into a project UUID object (or throws the appropriate error). +# If project is nothing, we fall back to the project_id of the authentication object, +# if present. function _project_uuid(auth::Authentication, project::Union{ProjectReference, Nothing})::UUIDs.UUID if isnothing(project) if isnothing(auth.project_id) @@ -88,17 +109,22 @@ function _project_uuid(auth::Authentication, project::Union{ProjectReference, No end """ - JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> Dataset + JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> ProjectDataset + +Looks up the specified dataset among the datasets attached to the project, returning a +[`ProjectDataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project +does not have the dataset attached. -Looks up a dataset in the context of a project. +$(_DOCS_nondynamic_datasets_object_warning) """ function project_dataset end function project_dataset( - dataset::Dataset; + dataset::Union{Dataset, ProjectDataset}; project::Union{ProjectReference, Nothing}=nothing, auth::Authentication=__auth__(), ) + _assert_projects_enabled(auth) project_uuid = _project_uuid(auth, project) datasets = _project_datasets(auth, project_uuid) for project_dataset in datasets @@ -142,9 +168,10 @@ function project_dataset( end """ - JuliaHub.project_datasets([project::Union{AbstractString, UUID}]; [auth::Authentication]) -> Vector{Dataset} + JuliaHub.project_datasets([project::ProjectReference]; [auth::Authentication]) -> Vector{Dataset} -Returns the list of datasets linked to the given project. +Returns the list of datasets attached to the project, as a list of [`ProjectDataset`](@ref) objects. +If the project is not explicitly specified, it uses the project of the authentication object. """ function project_datasets end @@ -239,7 +266,7 @@ Uploads a new version of a project-linked dataset. function upload_project_dataset end function upload_project_dataset( - ds::Dataset, + ds::Union{Dataset, ProjectDataset}, local_path::AbstractString; progress::Bool=true, project::Union{ProjectReference, Nothing}=nothing, @@ -281,8 +308,16 @@ function upload_project_dataset( end function upload_project_dataset( - ::Union{_DatasetRefTuple, AbstractString} + dataset::Union{_DatasetRefTuple, AbstractString}, + local_path::AbstractString; + progress::Bool=true, + project::Union{ProjectReference, Nothing}=nothing, + # Authentication + auth::Authentication=__auth__(), ) + project_uuid = _project_uuid(auth, project) + dataset = project_dataset(dataset; project=project_uuid, auth) + return upload_project_dataset(dataset, local_path; progress, project=project_uuid, auth) end # This calls the /datasets/{uuid}/versions?project={uuid} endpoint, diff --git a/src/utils.jl b/src/utils.jl index 3372ec7e9..18a50a09a 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -94,6 +94,25 @@ function Base.showerror(io::IO, e::PermissionError) isnothing(e.response) || print(io, '\n', e.response) end +""" + struct InvalidJuliaHubVersion <: JuliaHubException + +Thrown if the requested operation is not supported by the JuliaHub instance. +`.msg` contains a more detailed error message. + +!!! tip + + This generally means that the functionality you are attempting to use requires a + newer JuliaHub version. +""" +struct InvalidJuliaHubVersion <: JuliaHubException + msg::String +end + +function Base.showerror(io::IO, e::InvalidJuliaHubVersion) + print(io, "InvalidJuliaHubVersion: $(e.msg)") +end + _takebody!(r::HTTP.Response)::Vector{UInt8} = isa(r.body, IO) ? take!(r.body) : r.body _takebody!(r::HTTP.Response, ::Type{T}) where {T} = T(_takebody!(r)) From 121b122ff2643c08139b68fbfde2b7e6a2ec252d Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 10 Mar 2025 20:52:07 +1300 Subject: [PATCH 05/31] cleanup --- docs/src/reference/projects.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/src/reference/projects.md b/docs/src/reference/projects.md index bcc0a0e8b..1520b8e37 100644 --- a/docs/src/reference/projects.md +++ b/docs/src/reference/projects.md @@ -22,12 +22,6 @@ will automatically. However, you can opt-out of this behavior by explicitly passing a `project=nothing` to [`JuliaHub.authenticate`](@ref). -If you explicitly - -you can always pass `project= - -- `JULIAHUB_PROJECT_UUID` - You can always verify that your operations are running in the context of the correct project by checking the [`Authentication`](@ref) object, e.g. via [`current_authentication`](@ref): From 1f3c1aa8629f7ecef6f676b581bfeaeb1c44a560 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 11 Mar 2025 17:32:20 +1300 Subject: [PATCH 06/31] get rid of separate type --- docs/src/reference/datasets.md | 19 +++---- docs/src/reference/projects.md | 1 - src/datasets.jl | 83 ++++++++++++++++++++++++++--- src/projects.jl | 97 +++------------------------------- 4 files changed, 91 insertions(+), 109 deletions(-) diff --git a/docs/src/reference/datasets.md b/docs/src/reference/datasets.md index c2828ae7b..7dc55a414 100644 --- a/docs/src/reference/datasets.md +++ b/docs/src/reference/datasets.md @@ -41,15 +41,16 @@ The versions are indexed with a linear list of integers starting from `1`. ## Reference ```@docs -JuliaHub.Dataset -JuliaHub.DatasetVersion -JuliaHub.datasets -JuliaHub.DatasetReference -JuliaHub.dataset -JuliaHub.download_dataset -JuliaHub.upload_dataset -JuliaHub.update_dataset -JuliaHub.delete_dataset +Dataset +DatasetVersion +datasets +DatasetReference +dataset +download_dataset +upload_dataset +update_dataset +delete_dataset +DatasetProjectLink ``` ## Index diff --git a/docs/src/reference/projects.md b/docs/src/reference/projects.md index 1520b8e37..275a41ba3 100644 --- a/docs/src/reference/projects.md +++ b/docs/src/reference/projects.md @@ -33,7 +33,6 @@ julia> JuliaHub.current_authentication() ## Reference ```@docs -ProjectDataset project_datasets project_dataset upload_project_dataset diff --git a/src/datasets.jl b/src/datasets.jl index ebe923ff6..b20b4bc59 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -1,10 +1,10 @@ const _DOCS_nondynamic_datasets_object_warning = """ !!! warning "Non-dynamic dataset objects" - [`Dataset`](@ref) and [`ProjectDataset`](@ref) objects represents the dataset metadata when the - Julia object was created (e.g. with [`dataset`](@ref)), and are not automatically kept up to date. - To refresh the dataset metadata, you can pass the existing [`Dataset`](@ref) to [`JuliaHub.dataset`](@ref), - or [`ProjectDataset`](@ref) to [`project_dataset`](@ref). + [`Dataset`](@ref) objects represents the dataset metadata when the Julia object was created + (e.g. with [`dataset`](@ref)), and are not automatically kept up to date. + To refresh the dataset metadata, you can pass an existing [`Dataset`](@ref) object + to [`JuliaHub.dataset`](@ref) or [`project_dataset`](@ref). """ Base.@kwdef struct _DatasetStorage @@ -71,6 +71,25 @@ function Base.show(io::IO, ::MIME"text/plain", dsv::DatasetVersion) print(io, "\n size: ", dsv.size, " bytes") end +""" + struct DatasetProjectLink + +Holds the project-dataset link metadata for datasets that were accessed via a project +(e.g. when using [`project_datasets`](@ref)). + +- `.uuid :: UUID`: the UUID of the project +- `.is_writable :: Bool`: whether the user has write access to the dataset via the + this project + +See also: [`project_dataset`](@ref), [`project_datasets`](@ref), [`upload_project_dataset`](@ref). + +$(_DOCS_no_constructors_admonition) +""" +struct DatasetProjectLink + uuid::UUIDs.UUID + is_writable::Bool +end + """ struct Dataset @@ -87,6 +106,13 @@ public API: - Fields to access user-provided dataset metadata: - `description :: String`: dataset description - `tags :: Vector{String}`: a list of tags +- If the dataset was accessed via a project (e.g. via [`project_datasets`](@ref)), `.project` will + contain project metadata (see also: [`DatasetProjectLink`](@ref)). Otherwise this field is `nothing`. + - `project.uuid`: the UUID of the project + - `project.is_writable`: whether the user has write access to the dataset via the + this project + Note that two `Dataset` objects are considered to be equal (i.e. `==`) regardless of the `.project` + value -- it references the same dataset regardless of the project it was accessed in. !!! note "Canonical fully qualified dataset name" @@ -108,6 +134,7 @@ Base.@kwdef struct Dataset # User-set metadata description::String tags::Vector{String} + project::Union{DatasetProjectLink, Nothing} # Additional metadata, but not part of public API _last_modified::Union{Nothing, TimeZones.ZonedDateTime} _downloadURL::String @@ -117,11 +144,30 @@ Base.@kwdef struct Dataset _json::Dict end -function Dataset(d::Dict) +function Dataset(d::Dict; expected_project::Union{UUIDs.UUID, Nothing}=nothing) owner = d["owner"]["username"] name = d["name"] versions_json = _get_json_or(d, "versions", Vector, []) versions = sort([DatasetVersion(json; owner, name) for json in versions_json]; by=dsv -> dsv.id) + project = if !isnothing(expected_project) + project_json = _get_json(d, "project", Dict) + project_json_uuid = UUIDs.UUID( + _get_json(project_json, "project_id", String; msg=".project") + ) + if project_json_uuid != expected_project + msg = "Project UUID mismatch in dataset response: $(project_json_uuid), requested $(project)" + throw(JuliaHubError(msg)) + end + is_writable = _get_json( + project_json, + "is_writable", + Bool; + msg="Unable to parse .project in /datasets?project response", + ) + DatasetProjectLink(project_json_uuid, is_writable) + else + nothing + end Dataset(; uuid=UUIDs.UUID(d["id"]), name, owner, versions, @@ -129,6 +175,7 @@ function Dataset(d::Dict) description=d["description"], size=d["size"], tags=d["tags"], + project=project, _downloadURL=d["downloadURL"], _last_modified=_nothing_or(d["lastModified"]) do last_modified datetime_utc = Dates.DateTime( @@ -151,7 +198,12 @@ function Base.propertynames(::Dataset) end function Base.show(io::IO, d::Dataset) - print(io, "JuliaHub.dataset((\"", d.owner, "\", \"", d.name, "\"))") + dsref = string("(\"", d.owner, "\", \"", d.name, "\")") + if isnothing(d.project) + print(io, "JuliaHub.dataset(", dsref, ")") + else + print(io, "JuliaHub.project_dataset(", dsref, "; project=", d.project.uuid, ")") + end end function Base.show(io::IO, ::MIME"text/plain", d::Dataset) @@ -162,6 +214,13 @@ function Base.show(io::IO, ::MIME"text/plain", d::Dataset) print(io, "\n versions: ", length(d.versions)) print(io, "\n size: ", d.size, " bytes") isempty(d.tags) || print(io, "\n tags: ", join(d.tags, ", ")) + if !isnothing(d.project) + print( + io, + "\n project: ", d.project.uuid, " ", + d.project.is_writable ? "(writable)" : "(not writable)", + ) + end end function Base.:(==)(d1::Dataset, d2::Dataset) @@ -331,7 +390,9 @@ function datasets( end function _parse_dataset_list( - datasets::Vector; username::Union{AbstractString, Nothing}=nothing + datasets::Vector; + username::Union{AbstractString, Nothing}=nothing, + expected_project::Union{UUIDs.UUID, Nothing}=nothing, )::Vector{Dataset} # It might happen that some of the elements of the `datasets` array can not be parsed for some reason, # and the Dataset() constructor will throw. Rather than having `datasets` throw an error (as we would @@ -348,8 +409,14 @@ function _parse_dataset_list( if !isnothing(username) && (dataset["owner"]["username"] != username) return nothing end - return Dataset(dataset) + return Dataset(dataset; expected_project) catch e + # If we fail to parse the server response for a dataset, we should always get a JuliaHubError. + # Other errors types might indicate e.g. code errors, so we don't want to swallow those + # here, and instead throw immediately. + if !isa(e, JuliaHubError) + rethrow() + end @debug "Invalid dataset in GET /datasets response" dataset exception = ( e, catch_backtrace() ) diff --git a/src/projects.jl b/src/projects.jl index 7f9d0d2f5..d983ef39c 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -19,60 +19,6 @@ function _assert_projects_enabled(auth::Authentication) end end -""" - struct ProjectDataset - -A dataset object returned by the functions that return project dataset links. - -Has the same fields as [`Dataset`](@ref) plus the following fields that are specific -to project-dataset links: - -- `project_uuid::UUID`: identifies the project in the context of which the dataset was listed -- `is_writable :: Bool`: whether this dataset has been marked writable by the dataset owner -""" -struct ProjectDataset - _dataset::Dataset - project_uuid::UUIDs.UUID - is_writable::Bool -end - -function Base.getproperty(pd::ProjectDataset, name::Symbol) - dataset = getfield(pd, :_dataset) - if name in fieldnames(ProjectDataset) - return getfield(pd, name) - elseif name in propertynames(dataset) - return getproperty(dataset, name) - else - throw(ArgumentError("No property $name for ProjectDataset")) - end -end - -function Base.show(io::IO, pd::ProjectDataset) - print( - io, - "JuliaHub.project_dataset((\"", - pd.owner, - "\", \"", - pd.name, - "\"); project=\"", - pd.project_uuid, - "\")", - ) -end -function Base.show(io::IO, ::MIME"text/plain", pd::ProjectDataset) - printstyled(io, "ProjectDataset:"; bold=true) - print(io, " ", pd.name, " (", pd.dtype, ")") - print(io, "\n owner: ", pd.owner) - print( - io, "\n project: ", pd.project_uuid, " ", - pd.is_writable ? "(writable)" : "(not writable)", - ) - print(io, "\n description: ", pd.description) - print(io, "\n versions: ", length(pd.versions)) - print(io, "\n size: ", pd.size, " bytes") - isempty(pd.tags) || print(io, "\n tags: ", join(pd.tags, ", ")) -end - """ const ProjectReference :: Type @@ -109,10 +55,10 @@ function _project_uuid(auth::Authentication, project::Union{ProjectReference, No end """ - JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> ProjectDataset + JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> Dataset Looks up the specified dataset among the datasets attached to the project, returning a -[`ProjectDataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project +[`Dataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project does not have the dataset attached. $(_DOCS_nondynamic_datasets_object_warning) @@ -120,7 +66,7 @@ $(_DOCS_nondynamic_datasets_object_warning) function project_dataset end function project_dataset( - dataset::Union{Dataset, ProjectDataset}; + dataset::Dataset; project::Union{ProjectReference, Nothing}=nothing, auth::Authentication=__auth__(), ) @@ -170,7 +116,7 @@ end """ JuliaHub.project_datasets([project::ProjectReference]; [auth::Authentication]) -> Vector{Dataset} -Returns the list of datasets attached to the project, as a list of [`ProjectDataset`](@ref) objects. +Returns the list of datasets attached to the project, as a list of [`Dataset`](@ref) objects. If the project is not explicitly specified, it uses the project of the authentication object. """ function project_datasets end @@ -206,38 +152,7 @@ function _project_datasets(auth::Authentication, project::UUIDs.UUID) JuliaHub._throw_invalidresponse(r; msg="Unable to fetch datasets.") end datasets, _ = JuliaHub._parse_response_json(r, Vector) - n_erroneous_datasets = 0 - datasets = map(_parse_dataset_list(datasets)) do dataset - try - project_json = _get_json(dataset._json, "project", Dict) - project_json_uuid = _get_json(project_json, "project_id", String; msg=".project") - if project_json_uuid != string(project) - @debug "Invalid dataset in GET /datasets?project= response" dataset project_json_uuid project - n_erroneous_datasets += 1 - return nothing - end - is_writable = _get_json( - project_json, - "is_writable", - Bool; - msg="Unable to parse .project in /datasets?project response", - ) - return ProjectDataset(dataset, project, is_writable) - catch e - isa(e, JuliaHubError) || rethrow(e) - @debug "Invalid dataset in GET /datasets?project= response" dataset exception = ( - e, catch_backtrace() - ) - n_erroneous_datasets += 1 - return nothing - end - end - if n_erroneous_datasets > 0 - @warn "The JuliaHub GET /datasets?project= response contains erroneous project datasets. Omitting $(n_erroneous_datasets) entries." - end - # We'll filter down to just ProjectDataset objects, and enforce - # type-stability of the array type here. - return ProjectDataset[pd for pd in datasets if isa(pd, ProjectDataset)] + return _parse_dataset_list(datasets; expected_project=project) end """ @@ -266,7 +181,7 @@ Uploads a new version of a project-linked dataset. function upload_project_dataset end function upload_project_dataset( - ds::Union{Dataset, ProjectDataset}, + ds::Dataset, local_path::AbstractString; progress::Bool=true, project::Union{ProjectReference, Nothing}=nothing, From f67af70a25a8f42a7c34084518c6d8b915be6a59 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 12 Mar 2025 18:12:14 +1300 Subject: [PATCH 07/31] add basic Dataset constructor tests --- test/datasets.jl | 57 +++++++++++++++++++- test/mocking.jl | 137 ++++++++++++++++++++++------------------------- 2 files changed, 120 insertions(+), 74 deletions(-) diff --git a/test/datasets.jl b/test/datasets.jl index 0ea3ecf75..2431baee6 100644 --- a/test/datasets.jl +++ b/test/datasets.jl @@ -21,6 +21,61 @@ ) end +# These tests mainly exercise the Dataset() constructor, to ensure that it throws the +# correct error objects. +@testset "Dataset" begin + d0 = () -> _dataset_json("test/test"; version_sizes=[42]) + let ds = JuliaHub.Dataset(d0()) + @test ds isa JuliaHub.Dataset + @test ds.uuid == Base.UUID("3c4441bd-04bd-59f2-5426-70de923e67c2") + @test ds.owner == "test" + @test ds.name == "test" + @test ds.description == "An example dataset" + @test ds.tags == ["tag1", "tag2"] + @test ds.dtype == "Blob" + @test ds.size == 42 + @test length(ds.versions) == 1 + @test ds.versions[1].id == 1 + @test ds.versions[1].size == 42 + end + + # We don't verify dtype values (this list might expand in the future) + let d = Dict(d0()..., "type" => "Unknown Dtype") + ds = JuliaHub.Dataset(d) + @test ds.dtype == "Unknown Dtype" + end + + # If there are critical fields missing, it will throw + @testset "required property: $(pname)" for pname in ( + "id", "owner", "name", "type", "description", "tags", + "downloadURL", "lastModified", "credentials_url", "storage", + ) + d = Dict(d0()...) + delete!(d, pname) + # TODO: should not be a KeyError though.. + @test_throws KeyError JuliaHub.Dataset(d) + end + # We also need to be able to parse the UUID into UUIDs.UUID + let d = Dict(d0()..., "id" => "1234") + # TODO: should not be a ArgumentError though.. + @test_throws ArgumentError JuliaHub.Dataset(d) + end + + # Missing versions list is okay though. We assume that there are no + # versions then. + let d = d0() + delete!(d, "versions") + ds = JuliaHub.Dataset(d) + @test length(ds.versions) == 0 + end + # But a bad type is not okay + let d = Dict(d0()..., "versions" => 0) + @test_throws JuliaHub.JuliaHubError( + "Invalid JSON returned by the server: `versions` of type `Int64`, expected `<: Vector`." + ) JuliaHub.Dataset(d) + end +end + @testset "JuliaHub.dataset(s)" begin empty!(MOCK_JULIAHUB_STATE) Mocking.apply(mocking_patch) do @@ -119,7 +174,7 @@ end @test isempty(ds.versions) end - MOCK_JULIAHUB_STATE[:datasets_erroneous] = ["erroneous_dataset"] + MOCK_JULIAHUB_STATE[:datasets_erroneous] = ["bad-user/erroneous_dataset"] err_ds_warn = ( :warn, "The JuliaHub GET /datasets response contains erroneous datasets. Omitting 1 entries.", diff --git a/test/mocking.jl b/test/mocking.jl index 7a9f2b770..1fb00df4d 100644 --- a/test/mocking.jl +++ b/test/mocking.jl @@ -332,83 +332,25 @@ function _restcall_mocked(method, url, headers, payload; query) Dict("message" => "", "success" => true) |> jsonresponse(200) end elseif (method == :GET) && endswith(url, "datasets") - dataset_params = get(MOCK_JULIAHUB_STATE, :dataset_params, Dict()) - dataset_version_sizes = get(MOCK_JULIAHUB_STATE, :dataset_version_sizes, nothing) - zerotime = TimeZones.ZonedDateTime("2022-10-12T05:39:42.906+00:00") - versions_json = - dataset -> begin - version_sizes = something( - dataset_version_sizes, - (dataset == "example-dataset") ? [57, 331] : [57], - ) - Dict( - "version" => string("v", length(version_sizes)), - "versions" => map(enumerate(version_sizes)) do (i, sz) - Dict( - "version" => i, - "blobstore_path" => string("u", 2), - "size" => sz, - "date" => string(zerotime + Dates.Day(i) + Dates.Millisecond(sz)), - ) - end, - "size" => isempty(version_sizes) ? 0 : sum(version_sizes), - ) - end - #! format: off - shared = Dict( - "groups" => Any[], - "storage" => Dict( - "bucket_region" => "us-east-1", - "bucket" => "datasets-bucket", - "prefix" => "datasets", - "vendor" => "aws", - ), - "description" => get(dataset_params, "description", "An example dataset"), - "tags" => get(dataset_params, "tags", ["tag1", "tag2"]), - "license" => ( - "name" => "MIT License", - "spdx_id" => "MIT", - "text" => nothing, - "url" => "https://opensource.org/licenses/MIT", - ), - "lastModified" => "2022-10-12T05:39:42.906", - "downloadURL" => "", - "credentials_url" => "...", - ) - #! format: on - datasets = [] - for dataset_full_id in existing_datasets - username, dataset = split(dataset_full_id, '/'; limit=2) - push!(datasets, - Dict( - "id" => string(uuidhash(dataset_full_id)), - "name" => dataset, - "owner" => Dict( - "username" => username, - "type" => "User", - ), - "type" => occursin("blobtree", dataset) ? "BlobTree" : "Blob", - "visibility" => occursin("public", dataset) ? "public" : "private", - versions_json(dataset)..., - shared..., + datasets = Dict[] + for dataset_name in existing_datasets + d = _dataset_json( + dataset_name; + params=get(MOCK_JULIAHUB_STATE, :dataset_params, Dict()), + version_sizes=something( + get(MOCK_JULIAHUB_STATE, :dataset_version_sizes, nothing), + endswith(dataset_name, "/example-dataset") ? [57, 331] : [57], ), ) + push!(datasets, d) end - for dataset in get(MOCK_JULIAHUB_STATE, :datasets_erroneous, String[]) - push!(datasets, - Dict( - "id" => string(uuidhash(dataset)), - "name" => dataset, - "owner" => Dict( - "username" => nothing, - "type" => "User", - ), - "type" => occursin("blobtree", dataset) ? "BlobTree" : "Blob", - "visibility" => occursin("public", dataset) ? "public" : "private", - versions_json(dataset)..., - shared..., - ), + for dataset_name in get(MOCK_JULIAHUB_STATE, :datasets_erroneous, String[]) + d = _dataset_json( + dataset_name; + version_sizes=(dataset_name == "example-dataset") ? [57, 331] : [57], ) + d["owner"]["username"] = nothing + push!(datasets, d) end datasets |> jsonresponse(200) elseif (method == :DELETE) && endswith(url, DATASET_REGEX) @@ -776,3 +718,52 @@ function _http_request_mocked( ] HTTP.Response(200, headers, b"success") end + +function _dataset_json( + dataset_name::AbstractString; + params=Dict(), + version_sizes=[], +) + zerotime = TimeZones.ZonedDateTime("2022-10-12T05:39:42.906+00:00") + username, dataset = split(dataset_name, '/'; limit=2) + return Dict{String, Any}( + "id" => string(uuidhash(dataset_name)), + "name" => dataset, + "owner" => Dict{String, Any}( + "username" => username, + "type" => "User", + ), + "type" => occursin("blobtree", dataset) ? "BlobTree" : "Blob", + "visibility" => occursin("public", dataset) ? "public" : "private", + # versions + "version" => string("v", length(version_sizes)), + "versions" => map(enumerate(version_sizes)) do (i, sz) + Dict{String, Any}( + "version" => i, + "blobstore_path" => string("u", 2), + "size" => sz, + "date" => string(zerotime + Dates.Day(i) + Dates.Millisecond(sz)), + ) + end, + "size" => isempty(version_sizes) ? 0 : sum(version_sizes), + # shared + "groups" => Any[], + "storage" => Dict{String, Any}( + "bucket_region" => "us-east-1", + "bucket" => "datasets-bucket", + "prefix" => "datasets", + "vendor" => "aws", + ), + "description" => get(params, "description", "An example dataset"), + "tags" => get(params, "tags", ["tag1", "tag2"]), + "license" => ( + "name" => "MIT License", + "spdx_id" => "MIT", + "text" => nothing, + "url" => "https://opensource.org/licenses/MIT", + ), + "lastModified" => "2022-10-12T05:39:42.906", + "downloadURL" => "", + "credentials_url" => "...", + ) +end From 9d98a22682445ac4203f838e06379ea5f60f84d6 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 12 Mar 2025 19:12:07 +1300 Subject: [PATCH 08/31] make sure Dataset only throws JuliaHubError --- src/datasets.jl | 41 +++++++++++++++++++++++++---------------- src/utils.jl | 28 ++++++++++++++++++++++++++++ test/datasets.jl | 31 +++++++++++++++++++++++++------ test/mocking.jl | 2 +- test/utils.jl | 22 ++++++++++++++++++++++ 5 files changed, 101 insertions(+), 23 deletions(-) diff --git a/src/datasets.jl b/src/datasets.jl index 4d81eada5..c7146db99 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -131,30 +131,39 @@ Base.@kwdef struct Dataset end function Dataset(d::Dict) - owner = d["owner"]["username"] - name = d["name"] + owner = _get_json( + _get_json(d, "owner", Dict), + "username", String, + ) + name = _get_json(d, "name", AbstractString) versions_json = _get_json_or(d, "versions", Vector, []) - versions = sort([DatasetVersion(json; owner, name) for json in versions_json]; by=dsv -> dsv.id) + versions = sort( + [DatasetVersion(json; owner, name) for json in versions_json]; + by=dsv -> dsv.id, + ) + _storage = let storage_json = _get_json(d, "storage", Dict) + _DatasetStorage(; + credentials_url=_get_json(d, "credentials_url", AbstractString), + region=_get_json(storage_json, "bucket_region", AbstractString), + bucket=_get_json(storage_json, "bucket", AbstractString), + prefix=_get_json(storage_json, "prefix", AbstractString), + ) + end Dataset(; - uuid=UUIDs.UUID(d["id"]), + uuid=_get_json_convert(d, "id", UUIDs.UUID), name, owner, versions, - dtype=d["type"], - description=d["description"], - size=d["size"], - tags=d["tags"], - _downloadURL=d["downloadURL"], - _last_modified=_nothing_or(d["lastModified"]) do last_modified + dtype=_get_json(d, "type", AbstractString), + description=_get_json(d, "description", AbstractString), + size=_get_json(d, "size", Integer), + tags=_get_json(d, "tags", Vector{<:AbstractString}), + _downloadURL=_get_json(d, "downloadURL", AbstractString), + _last_modified=_nothing_or(_get_json(d, "lastModified", AbstractString)) do last_modified datetime_utc = Dates.DateTime( last_modified, Dates.dateformat"YYYY-mm-ddTHH:MM:SS.ss" ) _utc2localtz(datetime_utc) end, - _storage=_DatasetStorage(; - credentials_url=d["credentials_url"], - region=d["storage"]["bucket_region"], - bucket=d["storage"]["bucket"], - prefix=d["storage"]["prefix"], - ), + _storage, _json=d, ) end diff --git a/src/utils.jl b/src/utils.jl index 3372ec7e9..e08561bdc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -176,6 +176,34 @@ function _get_json_or( haskey(json, key) ? _get_json(json, key, T; msg) : default end +# _get_json_convert is a _get_json-type helper that also does some sort of type conversion +# parsing etc. The general signature is the following: +# +# function _get_json_convert( +# json::Dict, key::AbstractString, ::Type{T}; +# msg::Union{AbstractString, Nothing}=nothing +# )::T +# +# Although in practice we implement for each type separately, since the parsing/conversion logic +# can vary dramatically. +# +# A key point, though, is that it will throw a JuliaHubError if the server response is somehow +# invalid and we can't parse/convert it properly. +function _get_json_convert( + json::Dict, key::AbstractString, ::Type{UUIDs.UUID}; msg::Union{AbstractString, Nothing}=nothing +)::UUIDs.UUID + uuid_str = _get_json(json, key, String; msg) + uuid = tryparse(UUIDs.UUID, uuid_str) + if isnothing(uuid) + errormsg = """ + Invalid JSON returned by the server: `$key` not a valid UUID string. + Server returned '$(uuid_str)'.""" + isnothing(msg) || (errormsg = string(msg, '\n', errormsg)) + throw(JuliaHubError(errormsg)) + end + return uuid +end + """ mutable struct Secret diff --git a/test/datasets.jl b/test/datasets.jl index 2431baee6..301324cc0 100644 --- a/test/datasets.jl +++ b/test/datasets.jl @@ -50,15 +50,34 @@ end "id", "owner", "name", "type", "description", "tags", "downloadURL", "lastModified", "credentials_url", "storage", ) - d = Dict(d0()...) - delete!(d, pname) - # TODO: should not be a KeyError though.. - @test_throws KeyError JuliaHub.Dataset(d) + let d = Dict(d0()...) + delete!(d, pname) + e = @test_throws JuliaHub.JuliaHubError JuliaHub.Dataset(d) + @test startswith( + e.value.msg, + "Invalid JSON returned by the server: `$pname` missing in the response.", + ) + end + # Replace the value with a value that's of incorrect type + let d = Dict(d0()..., pname => missing) + e = @test_throws JuliaHub.JuliaHubError JuliaHub.Dataset(d) + @test startswith( + e.value.msg, + "Invalid JSON returned by the server: `$(pname)` of type `Missing`, expected", + ) + end end # We also need to be able to parse the UUID into UUIDs.UUID let d = Dict(d0()..., "id" => "1234") - # TODO: should not be a ArgumentError though.. - @test_throws ArgumentError JuliaHub.Dataset(d) + @test_throws JuliaHub.JuliaHubError( + "Invalid JSON returned by the server: `id` not a valid UUID string.\nServer returned '1234'." + ) JuliaHub.Dataset(d) + end + # And correctly throw for invalid owner.username + let d = Dict(d0()..., "owner" => nothing) + @test_throws JuliaHub.JuliaHubError( + "Invalid JSON returned by the server: `owner` of type `Nothing`, expected `<: Dict`." + ) JuliaHub.Dataset(d) end # Missing versions list is okay though. We assume that there are no diff --git a/test/mocking.jl b/test/mocking.jl index 1fb00df4d..7353cd9ff 100644 --- a/test/mocking.jl +++ b/test/mocking.jl @@ -725,7 +725,7 @@ function _dataset_json( version_sizes=[], ) zerotime = TimeZones.ZonedDateTime("2022-10-12T05:39:42.906+00:00") - username, dataset = split(dataset_name, '/'; limit=2) + username, dataset = string.(split(dataset_name, '/'; limit=2)) return Dict{String, Any}( "id" => string(uuidhash(dataset_name)), "name" => dataset, diff --git a/test/utils.jl b/test/utils.jl index ad08d96ae..8d004e69b 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -77,3 +77,25 @@ end @test_throws JuliaHub.JuliaHubError JuliaHub._parse_tz("") @test_throws JuliaHub.JuliaHubError JuliaHub._parse_tz("bad-string") end + +@testset "_get_json_convert" begin + @test JuliaHub._get_json_convert( + Dict("id" => "123e4567-e89b-12d3-a456-426614174000"), "id", UUIDs.UUID + ) == UUIDs.UUID("123e4567-e89b-12d3-a456-426614174000") + # Error cases: + @test_throws JuliaHub.JuliaHubError( + "Invalid JSON returned by the server: `id` not a valid UUID string.\nServer returned '123'." + ) JuliaHub._get_json_convert( + Dict("id" => "123"), "id", UUIDs.UUID + ) + @test_throws JuliaHub.JuliaHubError( + "Invalid JSON returned by the server: `id` of type `Int64`, expected `<: String`." + ) JuliaHub._get_json_convert( + Dict("id" => 123), "id", UUIDs.UUID + ) + @test_throws JuliaHub.JuliaHubError( + "Invalid JSON returned by the server: `id` missing in the response.\nKeys present: _id_missing\njson: Dict{String, String} with 1 entry:\n \"_id_missing\" => \"123e4567-e89b-12d3-a456-426614174000\"" + ) JuliaHub._get_json_convert( + Dict("_id_missing" => "123e4567-e89b-12d3-a456-426614174000"), "id", UUIDs.UUID + ) +end From 25c9f8fdf020cc4d18ad188ac6f59a966c8cd9c1 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 12 Mar 2025 19:19:43 +1300 Subject: [PATCH 09/31] in mocking, .tags is sometimes Any[] --- src/datasets.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets.jl b/src/datasets.jl index c7146db99..28446c3ed 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -155,7 +155,7 @@ function Dataset(d::Dict) dtype=_get_json(d, "type", AbstractString), description=_get_json(d, "description", AbstractString), size=_get_json(d, "size", Integer), - tags=_get_json(d, "tags", Vector{<:AbstractString}), + tags=_get_json(d, "tags", Vector), _downloadURL=_get_json(d, "downloadURL", AbstractString), _last_modified=_nothing_or(_get_json(d, "lastModified", AbstractString)) do last_modified datetime_utc = Dates.DateTime( From 512d23002a2d4c95d222df3c8065ece627eee751 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 12 Mar 2025 19:20:24 +1300 Subject: [PATCH 10/31] only silently capture JuliaHubErrors in datasets() --- src/datasets.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/datasets.jl b/src/datasets.jl index 28446c3ed..e5c353c2e 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -360,6 +360,10 @@ function datasets( end return Dataset(dataset) catch e + # If Dataset() fails due to some unexpected value in one of the dataset JSON objects that + # JuliaHub.jl can not handle, it should only throw a JuliaHubError. So we rethrow on other + # error types, as filtering all of them out could potentially hide JuliaHub.jl bugs. + isa(e, JuliaHubError) || rethrow() @debug "Invalid dataset in GET /datasets response" dataset exception = ( e, catch_backtrace() ) From 9a81327705dae09653d2cd8a5d03888c89f74d6d Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 12 Mar 2025 19:29:23 +1300 Subject: [PATCH 11/31] add a return --- src/datasets.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets.jl b/src/datasets.jl index e5c353c2e..cdf374358 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -149,7 +149,7 @@ function Dataset(d::Dict) prefix=_get_json(storage_json, "prefix", AbstractString), ) end - Dataset(; + return Dataset(; uuid=_get_json_convert(d, "id", UUIDs.UUID), name, owner, versions, dtype=_get_json(d, "type", AbstractString), From 344be60725a25f69afdcd2e59056cfb359d9716a Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 12 Mar 2025 19:57:59 +1300 Subject: [PATCH 12/31] don't rely on string rep of a parametric type --- test/datasets.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/datasets.jl b/test/datasets.jl index 301324cc0..f7a1672ef 100644 --- a/test/datasets.jl +++ b/test/datasets.jl @@ -89,9 +89,10 @@ end end # But a bad type is not okay let d = Dict(d0()..., "versions" => 0) - @test_throws JuliaHub.JuliaHubError( - "Invalid JSON returned by the server: `versions` of type `Int64`, expected `<: Vector`." - ) JuliaHub.Dataset(d) + e = @test_throws JuliaHub.JuliaHubError JuliaHub.Dataset(d) + @test startswith( + e.value.msg, "Invalid JSON returned by the server: `versions` of type `Int64`" + ) end end From 1f906f8c090e1ce54f61e0e048ba07b034e849f6 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 13 Mar 2025 15:57:28 +1300 Subject: [PATCH 13/31] bad merge --- src/datasets.jl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/datasets.jl b/src/datasets.jl index 7362fd4a2..9efd9cb3f 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -434,19 +434,10 @@ function _parse_dataset_list( end return Dataset(dataset; expected_project) catch e -<<<<<<< HEAD - # If we fail to parse the server response for a dataset, we should always get a JuliaHubError. - # Other errors types might indicate e.g. code errors, so we don't want to swallow those - # here, and instead throw immediately. - if !isa(e, JuliaHubError) - rethrow() - end -======= # If Dataset() fails due to some unexpected value in one of the dataset JSON objects that # JuliaHub.jl can not handle, it should only throw a JuliaHubError. So we rethrow on other # error types, as filtering all of them out could potentially hide JuliaHub.jl bugs. isa(e, JuliaHubError) || rethrow() ->>>>>>> mp/dataset-constructor-tests @debug "Invalid dataset in GET /datasets response" dataset exception = ( e, catch_backtrace() ) From 8811daadbe3ab5792658d7f35b71e20b8e049fc6 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 13 Mar 2025 23:40:03 +1300 Subject: [PATCH 14/31] fix --- src/datasets.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets.jl b/src/datasets.jl index 9efd9cb3f..7bee8fed1 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -158,7 +158,7 @@ Base.@kwdef struct Dataset _json::Dict end -function Dataset(d::Dict) +function Dataset(d::Dict; expected_project::Union{UUID, Nothing}=nothing) owner = _get_json( _get_json(d, "owner", Dict), "username", String, From 0590b89357d138a1890e0157caeb0265b8b23221 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 18 Mar 2025 18:51:24 +1300 Subject: [PATCH 15/31] fix current tests --- src/datasets.jl | 8 +++++--- src/projects.jl | 2 +- test/datasets.jl | 2 +- test/runtests.jl | 4 +++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/datasets.jl b/src/datasets.jl index 7bee8fed1..392e53952 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -217,7 +217,7 @@ function Dataset(d::Dict; expected_project::Union{UUID, Nothing}=nothing) end function Base.propertynames(::Dataset) - return (:owner, :name, :uuid, :dtype, :size, :versions, :description, :tags) + return (:owner, :name, :uuid, :dtype, :size, :versions, :description, :tags, :project) end function Base.show(io::IO, d::Dataset) @@ -695,7 +695,7 @@ function upload_dataset end # Any other 404 or other non-200 response indicates a backend failure _throw_invalidresponse(r) end - upload_config = _check_dataset_upload_config(r, dtype) + upload_config = _check_dataset_upload_config(r, dtype; newly_created_dataset) # Upload the actual data try _upload_dataset(upload_config, local_path; progress) @@ -720,7 +720,9 @@ function upload_dataset end return dataset((username, dataset_name); auth) end -function _check_dataset_upload_config(r::_RESTResponse, expected_dtype::AbstractString) +function _check_dataset_upload_config( + r::_RESTResponse, expected_dtype::AbstractString; newly_created_dataset::Bool +) upload_config, _ = _parse_response_json(r, Dict) # Verify that the dtype of the remote dataset is what we expect it to be. if upload_config["dataset_type"] != expected_dtype diff --git a/src/projects.jl b/src/projects.jl index d983ef39c..3131f76b8 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -202,7 +202,7 @@ function upload_project_dataset( _throw_invalidresponse(r) end # ... - upload_config = _check_dataset_upload_config(r, dtype) + upload_config = _check_dataset_upload_config(r, dtype; newly_created_dataset=false) # Upload the actual data try _upload_dataset(upload_config, local_path; progress) diff --git a/test/datasets.jl b/test/datasets.jl index 4b610b2f3..4e3ff821e 100644 --- a/test/datasets.jl +++ b/test/datasets.jl @@ -157,7 +157,7 @@ end s -> !startswith(string(s), "_"), fieldnames(JuliaHub.Dataset), ) - @test Set(propertynames(pd._dataset)) == Set(expected) + @test Set(propertynames(ds)) == Set(expected) end end let ds = JuliaHub.dataset(("username", "example-dataset"); throw=false) diff --git a/test/runtests.jl b/test/runtests.jl index d8a3b68b8..d26c67858 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -183,7 +183,7 @@ end :DefaultApp, :FileHash, :InvalidAuthentication, :InvalidRequestError, :Job, :WorkloadConfig, :JobFile, :JobLogMessage, :JobReference, :JobStatus, :JuliaHubConnectionError, :JuliaHubError, - :JuliaHubException, + :JuliaHubException, :InvalidJuliaHubVersion, :Limit, :NodeSpec, :PackageApp, :PackageJob, :Unlimited, :PermissionError, :script, :Secret, :UserApp, :application, :applications, :authenticate, @@ -198,6 +198,8 @@ end :nodespec, :nodespecs, :reauthenticate!, :submit_job, :update_dataset, :upload_dataset, :wait_job, :request, + :ProjectReference, :ProjectNotSetError, :DatasetProjectLink, + :project_dataset, :project_datasets, :upload_project_dataset, ]) extra_public_symbols = setdiff(public_symbols, expected_public_symbols) isempty(extra_public_symbols) || @warn """ From 5145e87920235a11045f80de21c1851badc94893 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 18 Mar 2025 19:37:11 +1300 Subject: [PATCH 16/31] fixes & tests for auth --- src/authentication.jl | 31 +++++++++++-------- test/authentication.jl | 70 +++++++++++++++++++++++++++++++++++++++--- test/mocking.jl | 13 +++++--- 3 files changed, 91 insertions(+), 23 deletions(-) diff --git a/src/authentication.jl b/src/authentication.jl index 247c054ad..a5125bdce 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -50,7 +50,7 @@ mutable struct Authentication tokenpath::Union{AbstractString, Nothing}=nothing, email::Union{AbstractString, Nothing}=nothing, expires::Union{Integer, Nothing}=nothing, - project_uuid::Union{UUIDs.UUID, Nothing}=nothing, + project_id::Union{UUIDs.UUID, Nothing}=nothing, ) # The authentication() function should take care of sanitizing the inputs here, # so it is fine to just error() here. @@ -61,7 +61,7 @@ mutable struct Authentication @warn "Invalid auth.toml token path passed to Authentication, ignoring." tokenpath tokenpath = nothing end - new(server, username, token, project_uuid, api_version, tokenpath, email, expires) + new(server, username, token, project_id, api_version, tokenpath, email, expires) end end @@ -232,10 +232,14 @@ This can be set by passing the optional `project` argument, which works as follo """ function authenticate end -function authenticate(server::AbstractString, token::Union{AbstractString, Secret}) +function authenticate( + server::AbstractString, token::Union{AbstractString, Secret}; + project::Union{AbstractString, UUIDs.UUID, Nothing, Missing}=missing, +) auth = _authentication( _juliahub_uri(server); token=isa(token, Secret) ? token : Secret(token), + project_id=_juliahub_project(project), ) global __AUTH__[] = auth return auth @@ -256,9 +260,9 @@ function authenticate( ), ) end - project_uuid = _normalize_project(project) + project_id = _juliahub_project(project) server_uri = _juliahub_uri(server) - auth = Mocking.@mock _authenticate(server_uri; force, maxcount, hook, project_uuid) + auth = Mocking.@mock _authenticate(server_uri; force, maxcount, hook, project_id) global __AUTH__[] = auth return auth end @@ -291,14 +295,14 @@ end function _authenticate( server_uri::URIs.URI; force::Bool, maxcount::Integer, hook::Union{Base.Callable, Nothing}, - project_uuid::Union{UUID, Nothing}, + project_id::Union{UUID, Nothing}, ) isnothing(hook) || PkgAuthentication.register_open_browser_hook(hook) try # _authenticate either returns a valid token, or throws auth_toml = _authenticate_retry(string(server_uri), 1; force, maxcount) # Note: _authentication may throw, which gets passed on to the user - _authentication(server_uri; project_uuid, auth_toml...) + _authentication(server_uri; project_id, auth_toml...) finally isnothing(hook) || PkgAuthentication.clear_open_browser_hook() end @@ -371,7 +375,7 @@ function _authentication( email::Union{AbstractString, Nothing}=nothing, username::Union{AbstractString, Nothing}=nothing, tokenpath::Union{AbstractString, Nothing}=nothing, - project_uuid::Union{UUID, Nothing}=nothing, + project_id::Union{UUID, Nothing}=nothing, ) # If something goes badly wrong in _get_api_information, it may throw. We won't really # be able to proceed, since we do not know what JuliaHub APIs to use, so we need to @@ -409,12 +413,12 @@ function _authentication( end return Authentication( server, api.api_version, username, token; - email, expires, tokenpath, project_uuid, + email, expires, tokenpath, project_id, ) end _authentication(server::AbstractString; kwargs...) = _authentication(URIs.URI(server); kwargs...) -function _normalize_project( +function _juliahub_project( project::Union{AbstractString, UUIDs.UUID, Nothing, Missing} )::Union{UUID, Nothing} if ismissing(project) @@ -426,7 +430,7 @@ function _normalize_project( return project elseif isa(project, AbstractString) project_uuid = tryparse(UUIDs.UUID, project) - if isnothing(project) + if isnothing(project_uuid) throw( ArgumentError( "Invalid project_id passed to Authentication() - not a UUID: $(project)" @@ -476,7 +480,8 @@ The `force`, `maxcount` and `hook` are relevant for interactive authentication, same way as in the [`authenticate`](@ref) function. This is mostly meant to be used to re-acquire authentication tokens in long-running sessions, where -the initial authentication token may have expired. +the initial authentication token may have expired. If the original `auth` object was authenticated +in the context of a project (i.e. `.project_id` is set), the project association will be retained. As [`Authentication`](@ref) objects are mutable, the token will be updated in all contexts where the reference to the [`Authentication`](@ref) has been passed to. @@ -534,7 +539,7 @@ function reauthenticate!( end end @debug "reauthenticate! -- calling PkgAuthentication" auth.server - new_auth = _authenticate(auth.server; force, maxcount, hook) + new_auth = _authenticate(auth.server; force, maxcount, hook, project_id=auth.project_id) if new_auth.username != auth.username throw( AuthenticationError( diff --git a/test/authentication.jl b/test/authentication.jl index 3947eb169..40f8389ea 100644 --- a/test/authentication.jl +++ b/test/authentication.jl @@ -1,22 +1,45 @@ +@testset "_juliahub_project" begin + uuid1 = "80c74bbd-fd5a-4f99-a647-0eec08183ed4" + uuid2 = "24d0f8a7-4c3f-4168-aef4-e49248f3cb40" + withenv("JULIAHUB_PROJECT_UUID" => nothing) do + @test JuliaHub._juliahub_project(uuid1) == UUIDs.UUID(uuid1) + @test_throws ArgumentError JuliaHub._juliahub_project("invalid") + @test JuliaHub._juliahub_project(nothing) === nothing + @test JuliaHub._juliahub_project(missing) === nothing + end + withenv("JULIAHUB_PROJECT_UUID" => uuid1) do + @test JuliaHub._juliahub_project(uuid2) == UUIDs.UUID(uuid2) + @test_throws ArgumentError JuliaHub._juliahub_project("invalid") + @test JuliaHub._juliahub_project(nothing) === nothing + @test JuliaHub._juliahub_project(missing) === UUIDs.UUID(uuid1) + end +end + @testset "JuliaHub.authenticate()" begin empty!(MOCK_JULIAHUB_STATE) Mocking.apply(mocking_patch) do - withenv("JULIA_PKG_SERVER" => nothing) do + withenv("JULIA_PKG_SERVER" => nothing, "JULIAHUB_PROJECT_UUID" => nothing) do @test_throws JuliaHub.AuthenticationError JuliaHub.authenticate() @test JuliaHub.authenticate("https://juliahub.example.org") isa JuliaHub.Authentication @test JuliaHub.authenticate("juliahub.example.org") isa JuliaHub.Authentication end - withenv("JULIA_PKG_SERVER" => "juliahub.example.org") do + withenv("JULIA_PKG_SERVER" => "juliahub.example.org", "JULIAHUB_PROJECT_UUID" => nothing) do @test JuliaHub.authenticate() isa JuliaHub.Authentication end - withenv("JULIA_PKG_SERVER" => "https://juliahub.example.org") do + withenv( + "JULIA_PKG_SERVER" => "https://juliahub.example.org", "JULIAHUB_PROJECT_UUID" => nothing + ) do @test JuliaHub.authenticate() isa JuliaHub.Authentication end - # Conflicting declarations, argument takes precendence - withenv("JULIA_PKG_SERVER" => "https://juliahub-one.example.org") do + # Conflicting declarations, explicit argument takes precedence + withenv( + "JULIA_PKG_SERVER" => "https://juliahub-one.example.org", + "JULIAHUB_PROJECT_UUID" => nothing, + ) do auth = JuliaHub.authenticate("https://juliahub-two.example.org") @test auth isa JuliaHub.Authentication @test auth.server == URIs.URI("https://juliahub-two.example.org") + @test auth.project_id === nothing # check_authentication MOCK_JULIAHUB_STATE[:invalid_authentication] = false @test JuliaHub.check_authentication(; auth) === true @@ -24,6 +47,24 @@ @test JuliaHub.check_authentication(; auth) === false delete!(MOCK_JULIAHUB_STATE, :invalid_authentication) end + + # Projects integration + uuid1 = "80c74bbd-fd5a-4f99-a647-0eec08183ed4" + uuid2 = "24d0f8a7-4c3f-4168-aef4-e49248f3cb40" + withenv( + "JULIA_PKG_SERVER" => nothing, + "JULIAHUB_PROJECT_UUID" => uuid1, + ) do + auth = JuliaHub.authenticate("https://juliahub.example.org") + @test auth.server == URIs.URI("https://juliahub.example.org") + @test auth.project_id === UUIDs.UUID(uuid1) + auth = JuliaHub.authenticate("https://juliahub.example.org"; project=uuid2) + @test auth.server == URIs.URI("https://juliahub.example.org") + @test auth.project_id === UUIDs.UUID(uuid2) + auth = JuliaHub.authenticate("https://juliahub.example.org"; project=nothing) + @test auth.server == URIs.URI("https://juliahub.example.org") + @test auth.project_id === nothing + end end end @@ -141,6 +182,25 @@ end @test a._email === nothing @test a._expires === nothing end + # Projects integration + # The JuliaHub.authenticate(server, token) method also takes the `project` + # keyword, and also falls back to the JULIAHUB_PROJECT_UUID. + uuid1 = "80c74bbd-fd5a-4f99-a647-0eec08183ed4" + uuid2 = "24d0f8a7-4c3f-4168-aef4-e49248f3cb40" + withenv( + "JULIA_PKG_SERVER" => nothing, + "JULIAHUB_PROJECT_UUID" => uuid1, + ) do + auth = JuliaHub.authenticate(server, token) + @test auth.server == URIs.URI("https://juliahub.example.org") + @test auth.project_id === UUIDs.UUID(uuid1) + auth = JuliaHub.authenticate(server, token; project=uuid2) + @test auth.server == URIs.URI("https://juliahub.example.org") + @test auth.project_id === UUIDs.UUID(uuid2) + auth = JuliaHub.authenticate(server, token; project=nothing) + @test auth.server == URIs.URI("https://juliahub.example.org") + @test auth.project_id === nothing + end # On old instances, we handle if /api/v1 404s MOCK_JULIAHUB_STATE[:auth_v1_status] = 404 let a = JuliaHub.authenticate(server, token) diff --git a/test/mocking.jl b/test/mocking.jl index 7353cd9ff..0be3c9ab9 100644 --- a/test/mocking.jl +++ b/test/mocking.jl @@ -23,10 +23,13 @@ end # Set up a mock authentication so that the __auth__() fallbacks would work and use this. const MOCK_USERNAME = "username" -mockauth(server_uri) = JuliaHub.Authentication( - server_uri, JuliaHub._MISSING_API_VERSION, MOCK_USERNAME, JuliaHub.Secret("") -) -JuliaHub.__AUTH__[] = mockauth(URIs.URI("https://juliahub.com")) +function mockauth(server_uri; project_id, kwargs...) + JuliaHub.Authentication( + server_uri, JuliaHub._MISSING_API_VERSION, MOCK_USERNAME, JuliaHub.Secret(""); + project_id, + ) +end +JuliaHub.__AUTH__[] = mockauth(URIs.URI("https://juliahub.com"); project_id=nothing) # The following Mocking.jl patches _rest_request, so the the rest calls would have fixed # reponses. @@ -69,7 +72,7 @@ mocking_patch = [ ), Mocking.@patch( function JuliaHub._authenticate(server_uri; kwargs...) - return mockauth(server_uri) + return mockauth(server_uri; kwargs...) end ), Mocking.@patch( From ac471011d78cea662901c68c45e5038baa4252d7 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 19 Mar 2025 19:54:49 +1300 Subject: [PATCH 17/31] add unit tests for listing APIs --- src/authentication.jl | 7 +- src/projects.jl | 25 +++++-- test/mocking.jl | 52 ++++++++++--- test/projects.jl | 170 ++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 9 +++ 5 files changed, 245 insertions(+), 18 deletions(-) create mode 100644 test/projects.jl diff --git a/src/authentication.jl b/src/authentication.jl index de26087df..946005e25 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -464,9 +464,10 @@ _authentication(server::AbstractString; kwargs...) = _authentication(URIs.URI(se function _juliahub_project( project::Union{AbstractString, UUIDs.UUID, Nothing, Missing} )::Union{UUID, Nothing} - if ismissing(project) - project = get(ENV, "JULIAHUB_PROJECT_UUID", nothing) - end + project = coalesce( + project, + get(ENV, "JULIAHUB_PROJECT_UUID", nothing), + ) if isnothing(project) return nothing elseif isa(project, UUIDs.UUID) diff --git a/src/projects.jl b/src/projects.jl index 3131f76b8..2bcf7254f 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -59,7 +59,17 @@ end Looks up the specified dataset among the datasets attached to the project, returning a [`Dataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project -does not have the dataset attached. +does not have such dataset attached. + +!!! note "Implicit dataset owner" + + When passing just the dataset name for `dataset` (i.e. `<: AbstractString`), then, just + like for the non-project [`JuliaHub.dataset`](@ref) function, it is assumed that the owner + of the dataset should be the currently authenticated user. + + However, a project may have multiple datasets with the same name attached to it, if they are + owned by different users. The best practice when accessing datasets in the context of projects is + to fully specify their name (i.e. also include the username). $(_DOCS_nondynamic_datasets_object_warning) """ @@ -118,15 +128,17 @@ end Returns the list of datasets attached to the project, as a list of [`Dataset`](@ref) objects. If the project is not explicitly specified, it uses the project of the authentication object. + +May throw a [`ProjectNotSetError`](@ref). Will throw an [`InvalidRequestError`] if the currently +authenticated user does not have access to the project or the project does not exists. """ function project_datasets end function project_datasets(; auth::Authentication=__auth__()) - project_id = auth.project_id - if isnothing(project_id) - throw(ArgumentError("Not authenticated in the context of a project.")) + if isnothing(auth.project_id) + throw(ProjectNotSetError()) end - return _project_datasets(auth, project_id) + return _project_datasets(auth, auth.project_id::UUID) end function project_datasets(project::AbstractString; auth::Authentication=__auth__()) @@ -138,6 +150,7 @@ function project_datasets(project::AbstractString; auth::Authentication=__auth__ end function _project_datasets(auth::Authentication, project::UUIDs.UUID) + _assert_projects_enabled(auth) r = JuliaHub._restcall( auth, :GET, ("datasets",), nothing; query=(; project=string(project)), @@ -195,7 +208,7 @@ function upload_project_dataset( r = _open_dataset_version(auth, ds.uuid, project_uuid) if r.status in (400, 403, 404) # These response codes indicate a problem with the request - msg = "Unable to upload to dataset ($(ds.owner), $(ds.name)): $(r.json) (code: $(r.status))" + msg = "Unable to upload to dataset ($(ds.owner), $(ds.name)): $(r.body) (code: $(r.status))" throw(InvalidRequestError(msg)) elseif r.status != 200 # Other response codes indicate a backend failure diff --git a/test/mocking.jl b/test/mocking.jl index 0be3c9ab9..29feb3ef6 100644 --- a/test/mocking.jl +++ b/test/mocking.jl @@ -23,13 +23,23 @@ end # Set up a mock authentication so that the __auth__() fallbacks would work and use this. const MOCK_USERNAME = "username" -function mockauth(server_uri; project_id, kwargs...) +function mockauth( + server_uri; + project_id::Union{UUIDs.UUID, Nothing}, + api_version::VersionNumber, + kwargs..., +) JuliaHub.Authentication( - server_uri, JuliaHub._MISSING_API_VERSION, MOCK_USERNAME, JuliaHub.Secret(""); + server_uri, api_version, MOCK_USERNAME, JuliaHub.Secret(""); project_id, ) end -JuliaHub.__AUTH__[] = mockauth(URIs.URI("https://juliahub.com"); project_id=nothing) +const DEFAULT_GLOBAL_MOCK_AUTH = mockauth( + URIs.URI("https://juliahub.com"); + api_version=JuliaHub._MISSING_API_VERSION, + project_id=nothing, +) +JuliaHub.__AUTH__[] = DEFAULT_GLOBAL_MOCK_AUTH # The following Mocking.jl patches _rest_request, so the the rest calls would have fixed # reponses. @@ -72,7 +82,12 @@ mocking_patch = [ ), Mocking.@patch( function JuliaHub._authenticate(server_uri; kwargs...) - return mockauth(server_uri; kwargs...) + project_id = get(kwargs, :project_id, nothing) + return mockauth( + server_uri; + api_version=JuliaHub._MISSING_API_VERSION, + project_id, + ) end ), Mocking.@patch( @@ -138,7 +153,7 @@ const MOCK_JULIAHUB_DEFAULT_JOB_FILES = Any[ function _restcall_mocked(method, url, headers, payload; query) GET_JOB_REGEX = r"api/rest/jobs/([a-z0-9-]+)" DATASET_REGEX = r"user/datasets/([A-Za-z0-9%-]+)" - DATASET_VERSIONS_REGEX = r"user/datasets/([A-Za-z0-9%-]+)/versions" + DATASET_VERSIONS_REGEX = r"(user/)?datasets/([A-Za-z0-9%-]+)/versions" # MOCK_JULIAHUB_STATE[:existing_datasets], if set, must be mutable (i.e. Vector), since # new dataset creation requests will push! to it. # @@ -335,6 +350,9 @@ function _restcall_mocked(method, url, headers, payload; query) Dict("message" => "", "success" => true) |> jsonresponse(200) end elseif (method == :GET) && endswith(url, "datasets") + # Note: query will be `nothing` if it's unset in _restcall, so we need + # to handle that case too. + project_uuid = get(something(query, (;)), :project, nothing) datasets = Dict[] for dataset_name in existing_datasets d = _dataset_json( @@ -344,6 +362,7 @@ function _restcall_mocked(method, url, headers, payload; query) get(MOCK_JULIAHUB_STATE, :dataset_version_sizes, nothing), endswith(dataset_name, "/example-dataset") ? [57, 331] : [57], ), + project_uuid, ) push!(datasets, d) end @@ -351,6 +370,7 @@ function _restcall_mocked(method, url, headers, payload; query) d = _dataset_json( dataset_name; version_sizes=(dataset_name == "example-dataset") ? [57, 331] : [57], + project_uuid, ) d["owner"]["username"] = nothing push!(datasets, d) @@ -392,8 +412,11 @@ function _restcall_mocked(method, url, headers, payload; query) Dict("repo_id" => string(UUIDs.uuid4())) |> jsonresponse(200) end elseif (method == :POST) && endswith(url, DATASET_VERSIONS_REGEX) - dataset = URIs.unescapeuri(match(DATASET_VERSIONS_REGEX, url)[1]) - if isnothing(payload) + dataset, is_user = let m = match(DATASET_VERSIONS_REGEX, url) + URIs.unescapeuri(m[2]), m[1] == "user/" + end + payload = JSON.parse(something(payload, "{}")) + if isempty(payload) || !haskey(payload, "action") if "$(MOCK_USERNAME)/$(dataset)" in existing_datasets Dict{String, Any}( "location" => Dict{String, Any}( @@ -416,7 +439,6 @@ function _restcall_mocked(method, url, headers, payload; query) JuliaHub._RESTResponse(404, "Dataset $(dataset) does not exist") end else - payload = JSON.parse(payload) @assert payload["action"] == "close" dataset = payload["name"] Dict{String, Any}( @@ -698,7 +720,7 @@ function _auth_apiv1_mocked() end d = Dict{String, Any}( "timezone" => Dict{String, Any}("abbreviation" => "Etc/UTC", "utc_offset" => "+00:00"), - "api_version" => "0.0.1", + "api_version" => get(MOCK_JULIAHUB_STATE, :auth_v1_api_version, "0.0.1"), ) username = get(MOCK_JULIAHUB_STATE, :auth_v1_username, MOCK_USERNAME) if !isnothing(username) @@ -724,11 +746,22 @@ end function _dataset_json( dataset_name::AbstractString; + project_uuid=nothing, params=Dict(), version_sizes=[], ) zerotime = TimeZones.ZonedDateTime("2022-10-12T05:39:42.906+00:00") username, dataset = string.(split(dataset_name, '/'; limit=2)) + project = if !isnothing(project_uuid) + Dict{String, Any}( + "project" => Dict( + "project_id" => project_uuid, + "is_writable" => false, + ), + ) + else + Dict{String, Any}() + end return Dict{String, Any}( "id" => string(uuidhash(dataset_name)), "name" => dataset, @@ -768,5 +801,6 @@ function _dataset_json( "lastModified" => "2022-10-12T05:39:42.906", "downloadURL" => "", "credentials_url" => "...", + project..., ) end diff --git a/test/projects.jl b/test/projects.jl new file mode 100644 index 000000000..4da46ce8c --- /dev/null +++ b/test/projects.jl @@ -0,0 +1,170 @@ +# We'll construct 3 Authentication objects that we can use +# later in the tests. +empty!(MOCK_JULIAHUB_STATE) +project_auth_0 = DEFAULT_GLOBAL_MOCK_AUTH +project_auth_1 = mockauth( + URIs.URI("https://juliahub.example.org"); api_version=v"0.0.1", + project_id=UUIDs.UUID("00000000-0000-0000-0000-000000000001"), +) +project_auth_2 = mockauth( + URIs.URI("https://juliahub.example.org"); api_version=v"0.2.0", + project_id=UUIDs.UUID("00000000-0000-0000-0000-000000000002"), +) +@testset "project_auth_*" begin + @test project_auth_0.project_id === nothing + @test project_auth_0._api_version === v"0.0.0-legacy" + + @test project_auth_1.project_id === UUIDs.UUID("00000000-0000-0000-0000-000000000001") + @test project_auth_1._api_version === v"0.0.1" + + @test project_auth_2.project_id === UUIDs.UUID("00000000-0000-0000-0000-000000000002") + @test project_auth_2._api_version === v"0.2.0" +end + +@testset "_project_uuid()" begin + ref_uuid = UUIDs.UUID("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") + @testset "project_auth_0" begin + @test_throws JuliaHub.ProjectNotSetError JuliaHub._project_uuid(project_auth_0, nothing) + @test_throws ArgumentError JuliaHub._project_uuid(project_auth_0, "1234") + JuliaHub._project_uuid(project_auth_0, string(ref_uuid)) === ref_uuid + JuliaHub._project_uuid(project_auth_0, ref_uuid) === ref_uuid + end + @testset "project_auth_1" begin + @test JuliaHub._project_uuid(project_auth_1, nothing) === project_auth_1.project_id + @test_throws ArgumentError JuliaHub._project_uuid(project_auth_1, "1234") + JuliaHub._project_uuid(project_auth_1, string(ref_uuid)) === ref_uuid + JuliaHub._project_uuid(project_auth_1, ref_uuid) === ref_uuid + end +end + +# We'll use the project_datasets() function to test the auth fallback and +# auth handling. +@testset "JuliaHub.project_datasets()" begin + empty!(MOCK_JULIAHUB_STATE) + Mocking.apply(mocking_patch) do + @testset "auth" begin + JuliaHub.__AUTH__[] = project_auth_0 + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_datasets() + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_datasets(; + auth=project_auth_0 + ) + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_datasets(; + auth=project_auth_1 + ) + @test JuliaHub.project_datasets(; auth=project_auth_2) isa Vector{JuliaHub.Dataset} + + JuliaHub.__AUTH__[] = project_auth_1 + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_datasets() + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_datasets(; + auth=project_auth_0 + ) + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_datasets(; + auth=project_auth_1 + ) + @test JuliaHub.project_datasets(; auth=project_auth_2) isa Vector{JuliaHub.Dataset} + + JuliaHub.__AUTH__[] = project_auth_2 + @test JuliaHub.project_datasets() isa Vector{JuliaHub.Dataset} + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_datasets(; + auth=project_auth_0 + ) + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_datasets(; + auth=project_auth_1 + ) + @test JuliaHub.project_datasets(; auth=project_auth_2) isa Vector{JuliaHub.Dataset} + end + + @testset "datasets" begin + datasets = JuliaHub.project_datasets() + @test length(datasets) === 3 + @testset "dataset: $(dataset.name)" for dataset in datasets + @test dataset isa JuliaHub.Dataset + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_2.project_id + @test dataset.project.is_writable === false + end + end + end +end + +@testset "JuliaHub.project_dataset()" begin + empty!(MOCK_JULIAHUB_STATE) + Mocking.apply(mocking_patch) do + @testset "auth" begin + JuliaHub.__AUTH__[] = project_auth_0 + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_dataset("example-dataset") + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_dataset("example-dataset"; + auth=project_auth_0, + ) + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_dataset("example-dataset"; + auth=project_auth_1, + ) + @test JuliaHub.project_dataset("example-dataset"; auth=project_auth_2) isa + JuliaHub.Dataset + + JuliaHub.__AUTH__[] = project_auth_1 + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_dataset("example-dataset") + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_dataset("example-dataset"; + auth=project_auth_0, + ) + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_dataset("example-dataset"; + auth=project_auth_1, + ) + @test JuliaHub.project_dataset("example-dataset"; auth=project_auth_2) isa + JuliaHub.Dataset + + JuliaHub.__AUTH__[] = project_auth_2 + @test JuliaHub.project_dataset("example-dataset") isa JuliaHub.Dataset + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_dataset("example-dataset"; + auth=project_auth_0, + ) + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub.project_dataset("example-dataset"; + auth=project_auth_1, + ) + @test JuliaHub.project_dataset("example-dataset"; auth=project_auth_2) isa + JuliaHub.Dataset + end + + @testset "datasets" begin + let dataset = JuliaHub.project_dataset("example-dataset") + @test dataset.name == "example-dataset" + @test dataset.owner == "username" + @test dataset.dtype == "Blob" + @test dataset.description == "An example dataset" + + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_2.project_id + @test dataset.project.is_writable === false + end + + let dataset = JuliaHub.project_dataset(("anotheruser", "publicdataset")) + @test dataset.name == "publicdataset" + @test dataset.owner == "anotheruser" + @test dataset.dtype == "Blob" + @test dataset.description == "An example dataset" + + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_2.project_id + @test dataset.project.is_writable === false + end + + dataset_noproject = JuliaHub.dataset("example-dataset") + @test dataset_noproject.project === nothing + let dataset = JuliaHub.project_dataset(dataset_noproject) + @test dataset.name == "example-dataset" + @test dataset.owner == "username" + @test dataset.dtype == "Blob" + @test dataset.description == "An example dataset" + + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_2.project_id + @test dataset.project.is_writable === false + end + + @test_throws JuliaHub.InvalidRequestError JuliaHub.project_dataset("no-such-dataset") + end + end +end + +# We'll restore the default (non-project) global auth +JuliaHub.__AUTH__[] = DEFAULT_GLOBAL_MOCK_AUTH diff --git a/test/runtests.jl b/test/runtests.jl index d26c67858..6028b48f8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -93,6 +93,12 @@ function list_datasets_prefix(prefix, args...; kwargs...) end @testset "JuliaHub.jl" begin + # JuliaHub.jl's behavior can be influenced by these two environment + # variables, so we explicitly unset them, just in case, to ensure that the + # tests run consistently. + delete!(ENV, "JULIA_PKG_SERVER") + delete!(ENV, "JULIAHUB_PROJECT_UUID") + # Just to make sure the logic within is_enabled() is correct. @testset "is_enabled" begin # We need to unset the environment variables read by extra_enabled_live_tests() @@ -245,6 +251,9 @@ end @testset "Jobs" begin include("jobs.jl") end + @testset "Projects" begin + include("projects.jl") + end @testset "_PackageBundler" begin include("packagebundler.jl") end From 52dee3cffa03f147b34391f8f204a192d50364f5 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 19 Mar 2025 20:40:43 +1300 Subject: [PATCH 18/31] basic upload_project_dataset unit tests --- test/mocking.jl | 8 ++++++-- test/projects.jl | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/test/mocking.jl b/test/mocking.jl index 29feb3ef6..be75961e5 100644 --- a/test/mocking.jl +++ b/test/mocking.jl @@ -417,7 +417,12 @@ function _restcall_mocked(method, url, headers, payload; query) end payload = JSON.parse(something(payload, "{}")) if isempty(payload) || !haskey(payload, "action") - if "$(MOCK_USERNAME)/$(dataset)" in existing_datasets + is_existing_dataset = if is_user + "$(MOCK_USERNAME)/$(dataset)" in existing_datasets + else + UUIDs.UUID(dataset) in uuidhash.(existing_datasets) + end + if is_existing_dataset Dict{String, Any}( "location" => Dict{String, Any}( "bucket" => "", @@ -440,7 +445,6 @@ function _restcall_mocked(method, url, headers, payload; query) end else @assert payload["action"] == "close" - dataset = payload["name"] Dict{String, Any}( "size_bytes" => 8124, "dataset_id" => "c1488c3f-0910-4f73-9c40-14f3c7a8696b", diff --git a/test/projects.jl b/test/projects.jl index 4da46ce8c..312958de9 100644 --- a/test/projects.jl +++ b/test/projects.jl @@ -166,5 +166,27 @@ end end end +@testset "JuliaHub.upload_project_dataset()" begin + Mocking.apply(mocking_patch) do + @test JuliaHub.upload_project_dataset("example-dataset", @__FILE__) isa JuliaHub.Dataset + @test JuliaHub.upload_project_dataset(("anotheruser", "publicdataset"), @__FILE__) isa + JuliaHub.Dataset + @test_throws JuliaHub.InvalidRequestError JuliaHub.upload_project_dataset( + ("non-existent-user", "example-dataset"), @__FILE__ + ) isa JuliaHub.Dataset + @test_throws JuliaHub.InvalidRequestError JuliaHub.upload_project_dataset( + "no-such-dataset", @__FILE__ + ) + dataset_noproject = JuliaHub.dataset("example-dataset") + @test dataset_noproject.project === nothing + dataset = JuliaHub.upload_project_dataset(dataset_noproject, @__FILE__) + @test dataset isa JuliaHub.Dataset + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_2.project_id + @test dataset.project.is_writable === false + @test JuliaHub.upload_project_dataset(dataset_noproject, @__FILE__) isa JuliaHub.Dataset + end +end + # We'll restore the default (non-project) global auth JuliaHub.__AUTH__[] = DEFAULT_GLOBAL_MOCK_AUTH From b7ad5811dca0577598ec7207dd879cc390075a59 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 20 Mar 2025 17:36:17 +1300 Subject: [PATCH 19/31] add live tests for projects --- src/projects.jl | 28 +++---- test/datasets-live.jl | 33 +++----- test/projects-live.jl | 173 ++++++++++++++++++++++++++++++++++++++++++ test/projects.jl | 55 +++++++++++--- test/runtests-live.jl | 23 ++++++ 5 files changed, 266 insertions(+), 46 deletions(-) create mode 100644 test/projects-live.jl diff --git a/src/projects.jl b/src/projects.jl index 2bcf7254f..3ad753143 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -132,21 +132,15 @@ If the project is not explicitly specified, it uses the project of the authentic May throw a [`ProjectNotSetError`](@ref). Will throw an [`InvalidRequestError`] if the currently authenticated user does not have access to the project or the project does not exists. """ -function project_datasets end - -function project_datasets(; auth::Authentication=__auth__()) - if isnothing(auth.project_id) - throw(ProjectNotSetError()) - end - return _project_datasets(auth, auth.project_id::UUID) -end - -function project_datasets(project::AbstractString; auth::Authentication=__auth__()) - project_uuid = tryparse(UUIDs.UUID, project) +function project_datasets( + project::Union{ProjectReference, Nothing}=nothing; + auth::Authentication=__auth__(), +) + project_uuid = _project_uuid(auth, project) if isnothing(project_uuid) throw(ArgumentError("`project` must be a UUID, got '$(project)'")) end - return project_datasets(project_uuid; auth) + return _project_datasets(auth, project_uuid) end function _project_datasets(auth::Authentication, project::UUIDs.UUID) @@ -169,10 +163,18 @@ function _project_datasets(auth::Authentication, project::UUIDs.UUID) end """ - JuliaHub.upload_project_dataset(dataset::DatasetReference, local_path; [auth,] kwargs...) -> Dataset + JuliaHub.upload_project_dataset( + dataset::DatasetReference, local_path; + progress=true, + [project::ProjectReference], + [auth::Authentication], + ) -> Dataset Uploads a new version of a project-linked dataset. +By default, the new dataset version will be associated with the project of the current authentication +session (if any), but this can be overridden by passing `project`. + !!! note "Permissions" Note that in order for this to work, you need to have edit rights on the projects and diff --git a/test/datasets-live.jl b/test/datasets-live.jl index 96ad22ad1..f5b081196 100644 --- a/test/datasets-live.jl +++ b/test/datasets-live.jl @@ -34,13 +34,10 @@ function _get_user_groups(auth::JuliaHub.Authentication)::Vector{String} end end - -TESTDATA = joinpath(@__DIR__, "testdata") -PREFIX = "JuliaHubTest_$(TESTID)" -@info "Uploading test data with prefix: $PREFIX" -blobname, treename = "$(PREFIX)_Blob", "$(PREFIX)_Tree" -weirdnames = string.("$(PREFIX)_", ["foo/bar/baz", "Δεδομένα", "Δε-δο-μέ/να"]) -deletename = "$(PREFIX)_Blob" +@info "Uploading test data with prefix: $TEST_PREFIX" +blobname, treename = "$(TEST_PREFIX)_Blob", "$(TEST_PREFIX)_Tree" +weirdnames = string.("$(TEST_PREFIX)_", ["foo/bar/baz", "Δεδομένα", "Δε-δο-μέ/να"]) +deletename = "$(TEST_PREFIX)_Blob" existing_datasets = JuliaHub.datasets(; auth) @test existing_datasets isa Array @@ -65,14 +62,14 @@ try @test isempty(JuliaHub.datasets("nonexistentpseudouser")) # The datasets generated by these tests should all have a unique prefix - @test isempty(list_datasets_prefix(PREFIX)) + @test isempty(list_datasets_prefix(TEST_PREFIX)) JuliaHub.upload_dataset( blobname, joinpath(TESTDATA, "hi.txt"); description="some blob", tags=["x", "y", "z"], auth, ) - datasets = list_datasets_prefix(PREFIX; auth) + datasets = list_datasets_prefix(TEST_PREFIX; auth) @test length(datasets) == 1 blob_dataset = only(filter(d -> d.name == blobname, datasets)) @test blob_dataset.description == "some blob" @@ -91,7 +88,7 @@ try (auth.username, treename), TESTDATA; description="some tree", tags=["a", "b", "c"], ) - datasets = list_datasets_prefix(PREFIX; auth) + datasets = list_datasets_prefix(TEST_PREFIX; auth) tree_dataset = only(filter(d -> d.name == treename, datasets)) @test length(datasets) == 2 @test tree_dataset.description == "some tree" @@ -267,19 +264,7 @@ try finally for dataset in (blobname, treename, deletename, weirdnames...) - try - @info "Deleting dataset: $dataset" - JuliaHub.delete_dataset(dataset; auth) - catch err - if isa(err, JuliaHub.InvalidRequestError) - println("$dataset not deleted: $(err)") - else - @warn "Failed to delete dataset '$dataset'" exception = (err, catch_backtrace()) - if err isa JuliaHub.JuliaHubError && !isnothing(err.exception) - @info "JuliaHubError inner exception" exception = err.exception - end - end - end + _delete_test_dataset(auth, dataset) end end -@test isempty(list_datasets_prefix(PREFIX; auth)) +@test isempty(list_datasets_prefix(TEST_PREFIX; auth)) diff --git a/test/projects-live.jl b/test/projects-live.jl new file mode 100644 index 000000000..eb0b13c30 --- /dev/null +++ b/test/projects-live.jl @@ -0,0 +1,173 @@ +function _api_add_project(auth, name) + body = Dict( + "name" => name, + "product_id" => 1, + "is_simple_mode" => false, + "instance_default_role" => "No Access", + ) + r = JuliaHub._restcall( + auth, + :POST, + ("api", "v1", "projects", "add"), + JSON.json(body); + headers=["Content-Type" => "application/json"], + ) + if r.status != 200 + error("Invalid response (/add): $(r.status)\n$(r.body)") + end + return r.json["project_id"] +end + +function _create_project(auth, name) + project_id = _api_add_project(auth, name) + r = JuliaHub._restcall( + auth, + :POST, + ("api", "v1", "projects", "create", project_id), + nothing; + headers=["Content-Type" => "application/json"], + ) + if r.status != 200 + error("Invalid response (/create): $(r.status)\n$(r.body)") + end + return (; + name, + project_id, + ) +end + +function _attach_dataset(auth, project_id, dataset_id; action="attach", writable=false) + body = [ + Dict( + "dataset" => dataset_id, + "action" => action, + "writable" => writable, + ), + ] + r = JuliaHub._restcall( + auth, + :PATCH, + ("api", "v1", "projects", "datasets", project_id), + JSON.json(body); + headers=["Content-Type" => "application/json"], + ) + if r.status != 200 + error("Invalid response (/datasets): $(r.status)\n$(r.body)") + end + return nothing +end + +# Create the projects and datasets +@info "Test project data with prefix: $TEST_PREFIX" +@testset "create project" begin + global project = _create_project(auth, "$(TEST_PREFIX) Datasets") + @test isempty(JuliaHub.project_datasets(project.project_id; auth)) +end + +# Upload a dataset, attach that to the project, and upload a new version to it. +project_dataset_name = "$(TEST_PREFIX)_Project" +try + @testset "upload a test dataset" begin + global project_dataset = JuliaHub.upload_dataset( + project_dataset_name, joinpath(TESTDATA, "hi.txt"); + description="some blob", tags=["x", "y", "z"], + auth, + ) + @test project_dataset.project === nothing + @test length(project_dataset.versions) == 1 + # TODO: add this properly to DatasetVersion? + @test project_dataset._json["versions"][1]["project"] === nothing + @test project_dataset._json["versions"][1]["uploader"]["username"] == auth.username + + # The authentication object we use does not have a project associated with it + @test_throws JuliaHub.ProjectNotSetError JuliaHub.upload_project_dataset( + project_dataset, joinpath(TESTDATA, "hi.txt") + ) + # .. so we need to pass it explicitly. However, at this point, the project + # is not attached. So uploading a new version will fail. + t = @test_throws JuliaHub.InvalidRequestError JuliaHub.upload_project_dataset( + project_dataset, joinpath(TESTDATA, "hi.txt"); project=project.project_id + ) + @test startswith( + t.value.msg, + "Unable to upload to dataset ($(auth.username), $(project_dataset.name))", + ) + @test occursin(project_dataset.name, t.value.msg) + @test occursin("code: 403", t.value.msg) + end + + @testset "attach dataset to project (non-writable)" begin + _attach_dataset(auth, project.project_id, string(project_dataset.uuid)) + + let datasets = JuliaHub.project_datasets(project.project_id; auth) + @test length(datasets) == 1 + @test datasets[1].name == project_dataset_name + @test datasets[1].uuid == project_dataset.uuid + @test datasets[1].project isa JuliaHub.DatasetProjectLink + @test datasets[1].project.uuid === UUIDs.UUID(project.project_id) + @test datasets[1].project.is_writable === false + @test length(datasets[1].versions) == 1 + end + + t = @test_throws JuliaHub.InvalidRequestError JuliaHub.upload_project_dataset( + project_dataset, joinpath(TESTDATA, "hi.txt"); project=project.project_id + ) + @test startswith( + t.value.msg, + "Unable to upload to dataset ($(auth.username), $(project_dataset.name))", + ) + @test occursin(project_dataset.name, t.value.msg) + @test occursin("code: 403", t.value.msg) + end + + @testset "attach dataset to project (writable)" begin + # Mark the dataset writable + _attach_dataset(auth, project.project_id, string(project_dataset.uuid); writable=true) + + let datasets = JuliaHub.project_datasets(project.project_id; auth) + @test length(datasets) == 1 + @test datasets[1].name == project_dataset_name + @test datasets[1].uuid == project_dataset.uuid + @test datasets[1].project isa JuliaHub.DatasetProjectLink + @test datasets[1].project.uuid === UUIDs.UUID(project.project_id) + @test datasets[1].project.is_writable === true + @test length(datasets[1].versions) == 1 + end + + dataset = JuliaHub.upload_project_dataset( + project_dataset, joinpath(TESTDATA, "hi.txt"); project=project.project_id + ) + @test dataset.name == project_dataset_name + @test dataset.uuid == project_dataset.uuid + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === UUIDs.UUID(project.project_id) + @test dataset.project.is_writable === true + + @test length(dataset.versions) == 2 + @test dataset._json["versions"][1]["project"] === nothing + @test dataset._json["versions"][1]["uploader"]["username"] == auth.username + @test dataset._json["versions"][2]["project"] == project.project_id + @test dataset._json["versions"][2]["uploader"]["username"] == auth.username + end + + @testset "project_dataset" begin + @test_throws JuliaHub.ProjectNotSetError JuliaHub.project_dataset(project_dataset; auth) + let dataset = JuliaHub.project_dataset( + project_dataset; project=project.project_id, auth + ) + @test dataset.name == project_dataset_name + @test dataset.uuid == project_dataset.uuid + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === UUIDs.UUID(project.project_id) + @test dataset.project.is_writable === true + + @test length(dataset.versions) == 2 + @test dataset._json["versions"][1]["project"] === nothing + @test dataset._json["versions"][1]["uploader"]["username"] == auth.username + @test dataset._json["versions"][2]["project"] == project.project_id + @test dataset._json["versions"][2]["uploader"]["username"] == auth.username + end + end +finally + _delete_test_dataset(auth, project_dataset_name) +end diff --git a/test/projects.jl b/test/projects.jl index 312958de9..a64a529fb 100644 --- a/test/projects.jl +++ b/test/projects.jl @@ -11,14 +11,21 @@ project_auth_2 = mockauth( project_id=UUIDs.UUID("00000000-0000-0000-0000-000000000002"), ) @testset "project_auth_*" begin - @test project_auth_0.project_id === nothing - @test project_auth_0._api_version === v"0.0.0-legacy" - - @test project_auth_1.project_id === UUIDs.UUID("00000000-0000-0000-0000-000000000001") - @test project_auth_1._api_version === v"0.0.1" - - @test project_auth_2.project_id === UUIDs.UUID("00000000-0000-0000-0000-000000000002") - @test project_auth_2._api_version === v"0.2.0" + let auth = project_auth_0 + @test auth.project_id === nothing + @test auth._api_version === v"0.0.0-legacy" + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub._assert_projects_enabled(auth) + end + let auth = project_auth_1 + @test auth.project_id === UUIDs.UUID("00000000-0000-0000-0000-000000000001") + @test auth._api_version === v"0.0.1" + @test_throws JuliaHub.InvalidJuliaHubVersion JuliaHub._assert_projects_enabled(auth) + end + let auth = project_auth_2 + @test auth.project_id === UUIDs.UUID("00000000-0000-0000-0000-000000000002") + @test auth._api_version === v"0.2.0" + @test JuliaHub._assert_projects_enabled(auth) === nothing + end end @testset "_project_uuid()" begin @@ -74,7 +81,7 @@ end @test JuliaHub.project_datasets(; auth=project_auth_2) isa Vector{JuliaHub.Dataset} end - @testset "datasets" begin + @testset "default project" begin datasets = JuliaHub.project_datasets() @test length(datasets) === 3 @testset "dataset: $(dataset.name)" for dataset in datasets @@ -84,6 +91,36 @@ end @test dataset.project.is_writable === false end end + + # These tests that we send project_auth_1.project_id to the backend + @testset "explicit project" begin + datasets = JuliaHub.project_datasets( + project_auth_1.project_id; + auth=project_auth_2, + ) + @test length(datasets) === 3 + @testset "dataset: $(dataset.name)" for dataset in datasets + @test dataset isa JuliaHub.Dataset + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_1.project_id + @test dataset.project.is_writable === false + end + + # Automatic parsing of string project_ids + datasets = JuliaHub.project_datasets( + string(project_auth_1.project_id); + auth=project_auth_2, + ) + @test length(datasets) === 3 + @testset "dataset: $(dataset.name)" for dataset in datasets + @test dataset isa JuliaHub.Dataset + @test dataset.project isa JuliaHub.DatasetProjectLink + @test dataset.project.uuid === project_auth_1.project_id + @test dataset.project.is_writable === false + end + + @test_throws ArgumentError datasets = JuliaHub.project_datasets("foo") + end end end diff --git a/test/runtests-live.jl b/test/runtests-live.jl index 33a6451bd..2341374da 100644 --- a/test/runtests-live.jl +++ b/test/runtests-live.jl @@ -1,6 +1,8 @@ # Can be used to prefix test-related data on the instance (like dataset names) # to avoid clashes with test suites running in parallel. TESTID = Random.randstring(8) +TEST_PREFIX = "JuliaHubTest_$(TESTID)" +TESTDATA = joinpath(@__DIR__, "testdata") # Authenticate the test session JULIAHUB_SERVER = get(ENV, "JULIAHUB_SERVER") do @@ -15,6 +17,22 @@ end @info "Authentication / API version: $(auth._api_version)" extra_enabled_live_tests(; print_info=true) +function _delete_test_dataset(auth, dataset) + try + @info "Deleting dataset: $dataset" + JuliaHub.delete_dataset(dataset; auth) + catch err + if isa(err, JuliaHub.InvalidRequestError) + println("$dataset not deleted: $(err)") + else + @warn "Failed to delete dataset '$dataset'" exception = (err, catch_backtrace()) + if err isa JuliaHub.JuliaHubError && !isnothing(err.exception) + @info "JuliaHubError inner exception" exception = err.exception + end + end + end +end + @testset "JuliaHub.jl LIVE tests" begin @testset "Authentication" begin @test_throws JuliaHub.AuthenticationError("Authentication unsuccessful after 3 tries") JuliaHub.authenticate( @@ -44,6 +62,11 @@ extra_enabled_live_tests(; print_info=true) include("datasets-large-live.jl") end + is_enabled("datasets-projects"; disabled_by_default=true) && + @testset "Large datasets" begin + include("projects-live.jl") + end + if is_enabled("jobs") @testset "JuliaHub Jobs" begin @testset "Basic" begin From 5ebf3ce9f153dcea3a1ad330330b2b7f9f0df921 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 20 Mar 2025 17:37:56 +1300 Subject: [PATCH 20/31] add option to run tests --- test/runtests-live.jl | 2 +- test/runtests.jl | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/test/runtests-live.jl b/test/runtests-live.jl index 2341374da..11f72503a 100644 --- a/test/runtests-live.jl +++ b/test/runtests-live.jl @@ -63,7 +63,7 @@ end end is_enabled("datasets-projects"; disabled_by_default=true) && - @testset "Large datasets" begin + @testset "Project-dataset integration" begin include("projects-live.jl") end diff --git a/test/runtests.jl b/test/runtests.jl index 6028b48f8..8f4d138e2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -39,6 +39,9 @@ function extra_enabled_live_tests(; print_info=false) if get(ENV, "JULIAHUBJL_LIVE_EXPOSED_PORT_TESTS", "") == "true" push!(testnames, "jobs-exposed-port") end + if get(ENV, "JULIAHUBJL_LIVE_PROJECTS_TESTS", "") == "true" + push!(testnames, "datasets-projects") + end if print_info && !isempty(testnames) testname_list = join(string.(" - ", testnames), '\n') @info """ From 6aea1a4956281fca455dc82d3684f9879d1c7610 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 16:29:53 +1300 Subject: [PATCH 21/31] format --- test/mocking.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mocking.jl b/test/mocking.jl index 13f669868..be75961e5 100644 --- a/test/mocking.jl +++ b/test/mocking.jl @@ -805,6 +805,6 @@ function _dataset_json( "lastModified" => "2022-10-12T05:39:42.906", "downloadURL" => "", "credentials_url" => "...", - project... + project..., ) end From cca4d61ce0a968a689886143c9e288dc9d48900e Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 16:40:44 +1300 Subject: [PATCH 22/31] docs: use [sources] --- docs/Project.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/Project.toml b/docs/Project.toml index 40ea80183..c2c571a0d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -12,3 +12,6 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" Changelog = "1" Documenter = "1" DocumenterMermaid = "0.1" + +[sources] +JuliaHub = { path = ".." } From b7d86dcd2b72105758e2e1f5415c3fb799f5bcf3 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 16:50:53 +1300 Subject: [PATCH 23/31] docstrings --- docs/Project.toml | 2 +- docs/make.jl | 16 ++++++++++++++-- src/projects.jl | 34 ++++++++++++++++++++++++++++------ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index c2c571a0d..68affa9ca 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -10,7 +10,7 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" [compat] Changelog = "1" -Documenter = "1" +Documenter = "1.9" DocumenterMermaid = "0.1" [sources] diff --git a/docs/make.jl b/docs/make.jl index cdff5cc73..261034425 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -9,6 +9,10 @@ import TimeZones ENV["TZ"] = "America/New_York" JuliaHub._LOCAL_TZ[] = TimeZones.localzone() +# Patching of the API responses. Also sets JuliaHub.__AUTH__. +include("../test/mocking.jl") + +# We don't want doctests to interfere with each other DocMeta.setdocmeta!( JuliaHub, :DocTestSetup, quote @@ -18,8 +22,16 @@ DocMeta.setdocmeta!( recursive=true, ) -# Patching of the API responses. Also sets JuliaHub.__AUTH__. -include("../test/mocking.jl") +# For project-related APIs, we need a different authentication object. +# So we set up small setup and teardown functions here too. +const DEFAULT_PROJECT_AUTH = mockauth( + URIs.URI("https://juliahub.com"); + api_version=v"0.2.0", + project_id=UUIDs.UUID("cd6c9ee3-d15f-414f-a762-7e1d3faed835"), +) +projectauth_setup!() = JuliaHub.__AUTH__[] = DEFAULT_PROJECT_AUTH +projectauth_teardown!() = JuliaHub.__AUTH__[] = DEFAULT_GLOBAL_MOCK_AUTH + # The following setup function is reused in both at-setup blocks, but also in # doctestsetup. function setup_job_results_file!() diff --git a/src/projects.jl b/src/projects.jl index 3ad753143..fe4eeea6c 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -1,13 +1,13 @@ """ struct ProjectNotSetError <: JuliaHubException -Exception thrown when the authentication object is not set to a project, nor was -an explicit project UUID provided, but the operation requires a project to be -specified. +Exception thrown by a project-related operation that requires a project to be specified, +but neither an explicit project reference was provided, nor was the project set for the +authentication object. """ struct ProjectNotSetError <: JuliaHubException end -function Base.showerror(io::IO, e::ProjectNotSetError) +function Base.showerror(io::IO, ::ProjectNotSetError) print(io, "ProjectNotSetError: authentication object not associated with a project") end @@ -61,14 +61,25 @@ Looks up the specified dataset among the datasets attached to the project, retur [`Dataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project does not have such dataset attached. +```jldoctest; setup = :(Main.projectauth_setup!()), teardown = :(Main.projectauth_teardown!()) +julia> JuliaHub.project_dataset(("username", "blobtree/example")) +Dataset: blobtree/example (BlobTree) + owner: username + description: An example dataset + versions: 1 + size: 57 bytes + tags: tag1, tag2 + project: cd6c9ee3-d15f-414f-a762-7e1d3faed835 (not writable) +``` + !!! note "Implicit dataset owner" When passing just the dataset name for `dataset` (i.e. `<: AbstractString`), then, just like for the non-project [`JuliaHub.dataset`](@ref) function, it is assumed that the owner of the dataset should be the currently authenticated user. - However, a project may have multiple datasets with the same name attached to it, if they are - owned by different users. The best practice when accessing datasets in the context of projects is + However, a project may have multiple datasets with the same name attached to it (if they are + owned by different users). The best practice when accessing datasets in the context of projects is to fully specify their name (i.e. also include the username). $(_DOCS_nondynamic_datasets_object_warning) @@ -131,6 +142,17 @@ If the project is not explicitly specified, it uses the project of the authentic May throw a [`ProjectNotSetError`](@ref). Will throw an [`InvalidRequestError`] if the currently authenticated user does not have access to the project or the project does not exists. + +```jldoctest; setup = :(Main.projectauth_setup!()), teardown = :(Main.projectauth_teardown!()) +julia> JuliaHub.current_authentication() +JuliaHub.Authentication("https://juliahub.com", "username", *****; project_id = "cd6c9ee3-d15f-414f-a762-7e1d3faed835") + +julia> JuliaHub.project_datasets() +3-element Vector{JuliaHub.Dataset}: + JuliaHub.project_dataset(("username", "example-dataset"); project=cd6c9ee3-d15f-414f-a762-7e1d3faed835) + JuliaHub.project_dataset(("anotheruser", "publicdataset"); project=cd6c9ee3-d15f-414f-a762-7e1d3faed835) + JuliaHub.project_dataset(("username", "blobtree/example"); project=cd6c9ee3-d15f-414f-a762-7e1d3faed835) +``` """ function project_datasets( project::Union{ProjectReference, Nothing}=nothing; From bbb6145678ffd1e860d073906b76e67c13655f20 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 19:02:53 +1300 Subject: [PATCH 24/31] auth docs --- docs/make.jl | 13 ++++++++++++ docs/src/guides/authentication.md | 2 ++ src/authentication.jl | 33 +++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/docs/make.jl b/docs/make.jl index 261034425..255fd095c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -32,6 +32,19 @@ const DEFAULT_PROJECT_AUTH = mockauth( projectauth_setup!() = JuliaHub.__AUTH__[] = DEFAULT_PROJECT_AUTH projectauth_teardown!() = JuliaHub.__AUTH__[] = DEFAULT_GLOBAL_MOCK_AUTH +function env_setup!(; project=false) + ENV["JULIA_PKG_SERVER"] = "juliahub.com" + if project + ENV["JULIAHUB_PROJECT_UUID"] = "b1a95ba8-43e6-4eb6-b280-3c5cbe0fa0b9" + end + return nothing +end +function env_teardown!() + delete!(ENV, "JULIA_PKG_SERVER") + delete!(ENV, "JULIAHUB_PROJECT_UUID") + return nothing +end + # The following setup function is reused in both at-setup blocks, but also in # doctestsetup. function setup_job_results_file!() diff --git a/docs/src/guides/authentication.md b/docs/src/guides/authentication.md index 3e7984259..a7aac5505 100644 --- a/docs/src/guides/authentication.md +++ b/docs/src/guides/authentication.md @@ -43,6 +43,8 @@ When working with JuliaHub.jl in JuliaHub cloud environment, such as in JuliaHub The `JULIA_PKG_SERVER` should always be correctly set up, and the `auth.toml` file will also be present and up-to-date. This means that [`authenticate`](@ref) should automatically be able to authenticate your session, and you should never be prompted for interactive authentication. +This also extends to JuliaHub Projects support --- if an IDE has been launched withing the context of a project, the environment will have the `JULIAHUB_PROJECT_UUID` environment set, and the authentication object will be automatically linked to the correct project. + !!! tip "No need to authenticate()" As in a JuliaHub environment everything is already set up and pointing to the correct server, you do not need to call [`authenticate`](@ref) yourself, nor set up `JULIA_PKG_SERVER`. diff --git a/src/authentication.jl b/src/authentication.jl index 946005e25..5cf0e07a4 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -229,6 +229,39 @@ This can be set by passing the optional `project` argument, which works as follo When the [`Authentication`](@ref) object is constructed, access to or existence of the specified project is not checked. However, if you attempt any project operations with with such an authentication object, they will fail and throw an error. + +# Examples + +If `JULIA_PKG_SERVER` is set, `authenticate()` will pick it up automatically, +although it can also be overridden by setting the instance hostname explicitly: + +```jldoctest; setup = :(Main.env_setup!()), teardown = :(Main.env_teardown!()) +julia> ENV["JULIA_PKG_SERVER"] +"juliahub.com" + +julia> JuliaHub.authenticate() +JuliaHub.Authentication("https://juliahub.com", "username", *****) + +julia> JuliaHub.authenticate("mycompany.juliahub.com") +JuliaHub.Authentication("https://mycompany.juliahub.com", "username", *****) +``` + +If `JULIAHUB_PROJECT_UUID` is set to point to a JuliaHub Project (e.g. in JuliaHub cloud +environments), it will also get automatically picked up, but can also be overridden: + +```jldoctest; setup = :(Main.env_setup!(; project=true)), teardown = :(Main.env_teardown!()) +julia> ENV["JULIAHUB_PROJECT_UUID"] +"b1a95ba8-43e6-4eb6-b280-3c5cbe0fa0b9" + +julia> JuliaHub.authenticate() +JuliaHub.Authentication("https://juliahub.com", "username", *****; project_id = "b1a95ba8-43e6-4eb6-b280-3c5cbe0fa0b9") + +julia> JuliaHub.authenticate(; project = "7ed96f69-a765-4de6-ac00-04a38684ce1c") +JuliaHub.Authentication("https://juliahub.com", "username", *****; project_id = "7ed96f69-a765-4de6-ac00-04a38684ce1c") + +julia> JuliaHub.authenticate(; project = nothing) +JuliaHub.Authentication("https://juliahub.com", "username", *****) +``` """ function authenticate end From 355856ce66ad13e5a92f43896f2228bf4e4d8309 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 19:28:28 +1300 Subject: [PATCH 25/31] changelog --- CHANGELOG.md | 11 +++++++++++ docs/src/reference/projects.md | 4 ++-- src/projects.jl | 4 +++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3792c1ada..9232f4dde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Added +* With JuliaHub instances v6.9 and above, JuliaHub.jl now supports project-dataset operations. ([#15], [#82]) + + This includes the following new features: + + - Authentication objects can now be associated with projects. + If the `JULIAHUB_PROJECT_UUID` environment variable is set, JuliaHub.jl will pick it up automatically.. + - The `project_dataset` and `project_datasets` functions allow for listing datasets attached to a project. + - `upload_project_dataset` can be used to upload a new version of a dataset. + * All the public API names are now correctly marked `public` in Julia 1.11 and above. ([#83]) ### Changed @@ -134,6 +143,7 @@ Initial package release. [#12]: https://github.com/JuliaComputing/JuliaHub.jl/issues/12 [#13]: https://github.com/JuliaComputing/JuliaHub.jl/issues/13 [#14]: https://github.com/JuliaComputing/JuliaHub.jl/issues/14 +[#15]: https://github.com/JuliaComputing/JuliaHub.jl/issues/15 [#18]: https://github.com/JuliaComputing/JuliaHub.jl/issues/18 [#28]: https://github.com/JuliaComputing/JuliaHub.jl/issues/28 [#31]: https://github.com/JuliaComputing/JuliaHub.jl/issues/31 @@ -150,6 +160,7 @@ Initial package release. [#53]: https://github.com/JuliaComputing/JuliaHub.jl/issues/53 [#58]: https://github.com/JuliaComputing/JuliaHub.jl/issues/58 [#74]: https://github.com/JuliaComputing/JuliaHub.jl/issues/74 +[#82]: https://github.com/JuliaComputing/JuliaHub.jl/issues/82 [#83]: https://github.com/JuliaComputing/JuliaHub.jl/issues/83 [#84]: https://github.com/JuliaComputing/JuliaHub.jl/issues/84 [#86]: https://github.com/JuliaComputing/JuliaHub.jl/issues/86 diff --git a/docs/src/reference/projects.md b/docs/src/reference/projects.md index 275a41ba3..29970b5ac 100644 --- a/docs/src/reference/projects.md +++ b/docs/src/reference/projects.md @@ -25,9 +25,9 @@ However, you can opt-out of this behavior by explicitly passing a `project=nothi You can always verify that your operations are running in the context of the correct project by checking the [`Authentication`](@ref) object, e.g. via [`current_authentication`](@ref): -```wip-jldoctest +```jldoctest; setup = :(using JuliaHub; Main.projectauth_setup!()), teardown = :(Main.projectauth_teardown!()) julia> JuliaHub.current_authentication() -... +JuliaHub.Authentication("https://juliahub.com", "username", *****; project_id = "cd6c9ee3-d15f-414f-a762-7e1d3faed835") ``` ## Reference diff --git a/src/projects.jl b/src/projects.jl index fe4eeea6c..800d237e2 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -200,7 +200,9 @@ session (if any), but this can be overridden by passing `project`. !!! note "Permissions" Note that in order for this to work, you need to have edit rights on the projects and - the dataset needs to have been marked writable by the dataset owner. + the dataset needs to have been marked writable by the dataset owner. However, unlike for + normal datasets uploads (with [`upload_dataset`](@ref)), you do not need to be the dataset + owner to upload new versions. !!! tip From adfd3f5d05e6e4c7389198a0530eef2d564b7580 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 19:46:25 +1300 Subject: [PATCH 26/31] make JET happy? --- src/projects.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/projects.jl b/src/projects.jl index 800d237e2..973c5f422 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -36,10 +36,11 @@ const ProjectReference = Union{AbstractString, UUIDs.UUID} # if present. function _project_uuid(auth::Authentication, project::Union{ProjectReference, Nothing})::UUIDs.UUID if isnothing(project) - if isnothing(auth.project_id) + project_id = project_id + if isnothing(project_id) throw(ProjectNotSetError()) else - return auth.project_id + return project_id end elseif isa(project, UUIDs.UUID) return project From f6bcd7c69bb3b297b33807012033e7b345e04917 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 24 Mar 2025 20:28:35 +1300 Subject: [PATCH 27/31] :facepalm: --- src/projects.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/projects.jl b/src/projects.jl index 973c5f422..e55a332ae 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -36,7 +36,7 @@ const ProjectReference = Union{AbstractString, UUIDs.UUID} # if present. function _project_uuid(auth::Authentication, project::Union{ProjectReference, Nothing})::UUIDs.UUID if isnothing(project) - project_id = project_id + project_id = auth.project_id if isnothing(project_id) throw(ProjectNotSetError()) else From 5aa13faf494c1fd836a66d47b2283c4b35860950 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 27 Mar 2025 15:18:46 +1300 Subject: [PATCH 28/31] fix project_dataset print --- src/datasets.jl | 2 +- test/projects.jl | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/datasets.jl b/src/datasets.jl index 392e53952..9b98dc9d1 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -225,7 +225,7 @@ function Base.show(io::IO, d::Dataset) if isnothing(d.project) print(io, "JuliaHub.dataset(", dsref, ")") else - print(io, "JuliaHub.project_dataset(", dsref, "; project=", d.project.uuid, ")") + print(io, "JuliaHub.project_dataset(", dsref, "; project=\"", d.project.uuid, "\")") end end diff --git a/test/projects.jl b/test/projects.jl index a64a529fb..4c3985f3a 100644 --- a/test/projects.jl +++ b/test/projects.jl @@ -121,6 +121,27 @@ end @test_throws ArgumentError datasets = JuliaHub.project_datasets("foo") end + + # show() methods on Dataset objects that print as project_dataset()-s + JuliaHub.__AUTH__[] = project_auth_2 + @testset "show methods" begin + datasets = JuliaHub.project_datasets(project_auth_1.project_id) + @test length(datasets) === 3 + let ex = Meta.parse(string(datasets[1])) + @test ex.head == :call + @test ex.args[1] == :(JuliaHub.project_dataset) + + ds = eval(ex) + @test ds isa JuliaHub.Dataset + @test ds == datasets[1] + @test ds != datasets[2] + end + let datasets_eval = eval(Meta.parse(string(datasets))) + @test datasets_eval isa Vector{JuliaHub.Dataset} + @test length(datasets_eval) == length(datasets) + @test datasets_eval == datasets + end + end end end From e0996f931ac9be474a5cfec529cff5bdad532cdc Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 8 Apr 2025 19:40:50 +1200 Subject: [PATCH 29/31] Update src/authentication.jl --- src/authentication.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/authentication.jl b/src/authentication.jl index edd811c9b..f0b5ff444 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -215,7 +215,7 @@ This can be set by passing the optional `project` argument, which works as follo - If you pass an explicit UUID (either as a string or an `UUID` object), that will then be used as the project. Note that a UUID passed as a string must be a syntactically correct UUID. -- If you pass `nothing`, that make JuliaHub.jl ignore any values in the `JULIAHUB_PROJECT_UUID` +- Passing `nothing` makes JuliaHub.jl ignore any values in the `JULIAHUB_PROJECT_UUID` environment variable. !!! note "JULIAHUB_PROJECT_UUID" From 9d11d5ad7940fbb51f1a68d1b15405932fb0bd7a Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 8 Apr 2025 20:36:15 +1200 Subject: [PATCH 30/31] don't allow missing in authenticate() --- src/authentication.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/authentication.jl b/src/authentication.jl index f0b5ff444..c48138b9f 100644 --- a/src/authentication.jl +++ b/src/authentication.jl @@ -267,7 +267,7 @@ function authenticate end function authenticate( server::AbstractString, token::Union{AbstractString, Secret}; - project::Union{AbstractString, UUIDs.UUID, Nothing, Missing}=missing, + project::Union{AbstractString, UUIDs.UUID, Nothing}=_juliahub_project(missing), ) auth = try auth = _authentication( @@ -288,7 +288,7 @@ function authenticate( force::Bool=false, maxcount::Integer=_DEFAULT_authenticate_maxcount, hook::Union{Base.Callable, Nothing}=nothing, - project::Union{AbstractString, UUIDs.UUID, Nothing, Missing}=missing, + project::Union{AbstractString, UUIDs.UUID, Nothing}=_juliahub_project(missing), ) maxcount >= 1 || throw(ArgumentError("maxcount must be >= 1, got '$maxcount'")) if !isnothing(hook) && !hasmethod(hook, Tuple{AbstractString}) From 475ab36a7a06a2101b5f5c296ad56427854f42f7 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 8 Apr 2025 21:02:49 +1200 Subject: [PATCH 31/31] fix doctests --- src/projects.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/projects.jl b/src/projects.jl index e55a332ae..903d06165 100644 --- a/src/projects.jl +++ b/src/projects.jl @@ -150,9 +150,9 @@ JuliaHub.Authentication("https://juliahub.com", "username", *****; project_id = julia> JuliaHub.project_datasets() 3-element Vector{JuliaHub.Dataset}: - JuliaHub.project_dataset(("username", "example-dataset"); project=cd6c9ee3-d15f-414f-a762-7e1d3faed835) - JuliaHub.project_dataset(("anotheruser", "publicdataset"); project=cd6c9ee3-d15f-414f-a762-7e1d3faed835) - JuliaHub.project_dataset(("username", "blobtree/example"); project=cd6c9ee3-d15f-414f-a762-7e1d3faed835) + JuliaHub.project_dataset(("username", "example-dataset"); project="cd6c9ee3-d15f-414f-a762-7e1d3faed835") + JuliaHub.project_dataset(("anotheruser", "publicdataset"); project="cd6c9ee3-d15f-414f-a762-7e1d3faed835") + JuliaHub.project_dataset(("username", "blobtree/example"); project="cd6c9ee3-d15f-414f-a762-7e1d3faed835") ``` """ function project_datasets(