Skip to content

Commit a918874

Browse files
authored
Merge pull request #4 from panubo/feature/load-direct-source
Add support for loading from a fully qualified path, or timestamp
2 parents 32ba064 + 36931a1 commit a918874

File tree

4 files changed

+124
-24
lines changed

4 files changed

+124
-24
lines changed

commands/common.sh

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ get_storage_commands() {
6363
fetch_cmd=( "gsutil" "cp" )
6464
storage_type="gs"
6565
gsutil_auth
66+
find_object="find_object_gs"
6667
;;
6768
s3://*)
6869
echo ">> Storage type: aws s3"
@@ -72,6 +73,7 @@ get_storage_commands() {
7273
ls_cmd=( "aws" "s3" "ls" "${AWS_S3_ADDITIONAL_ARGS}" )
7374
fetch_cmd=( "aws" "s3" "cp" "${AWS_S3_ADDITIONAL_ARGS}" )
7475
storage_type="s3"
76+
find_object="find_object_s3"
7577
;;
7678
file://*|/*|./*)
7779
echo ">> Storage type: file"
@@ -80,6 +82,7 @@ get_storage_commands() {
8082
fetch_cmd=( "cat" )
8183
source="${source#file:\/\/}"
8284
storage_type="file"
85+
find_object="find_object_file"
8386
;;
8487
*)
8588
echoerr "Unknown storage type"
@@ -136,3 +139,108 @@ gsutil_auth() {
136139
"service_account = default" > /etc/boto.cfg
137140
fi
138141
}
142+
143+
# helper functions
144+
function get_filename_from_object_path() {
145+
# Returns just the filename portion of the full object path
146+
echo "${1}" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/'
147+
}
148+
149+
function get_basename_from_object_path() {
150+
# Returns just the bucketname / base path
151+
echo "${1}" | sed 's/\(file\|s3\|gs\):\/\/\([^\/]\+\)\/.*/\1:\/\/\2\//'
152+
}
153+
154+
function get_timestamp_from_object_path() {
155+
# Returns just the timestamp portion of the full object path 2-14 digits
156+
echo "${1}" | sed -n 's/.*\/\([0-9]\{2,14\}\).*/\1/p; t; q;'
157+
}
158+
159+
function check_object_exists() {
160+
if [[ $(eval "${ls_cmd[@]}" "${1}") ]]; then
161+
return 0
162+
else
163+
echoerr "Error file not found"
164+
return 1
165+
fi
166+
}
167+
168+
function find_object_gs {
169+
# find the object
170+
# the following are are all valid
171+
# gs://mybucket/20230413000003/my_database.sql.lz4
172+
# gs://mybucket/20230413000003/ my_database
173+
# gs://mybucket/ my_database
174+
# gs://mybucket/20230413 my_database
175+
176+
source="${1}"
177+
database="${2:-}"
178+
timestamp="$(get_timestamp_from_object_path "${source}")"
179+
base="$(get_basename_from_object_path "${source}")"
180+
181+
if [[ "${timestamp}" == "" ]]; then
182+
# no timestamp in the path, find the latest
183+
timestamp="$(eval "${ls_cmd[@]}" "${source}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | sort | tail -n1)"
184+
full_path="$(eval "${ls_cmd[@]}" "${source}${timestamp}/" | grep "/${database}[\.\-]")"
185+
else
186+
# has timestamp, either fully qualified, or needs expanding
187+
if [[ $source =~ [0-9]{14}/${database} ]]; then
188+
# should be complete path
189+
full_path="${source}"
190+
elif [[ $source =~ [0-9]{14} ]]; then
191+
# complete timestamp
192+
full_path="$(eval "${ls_cmd[@]}" "${source}" | grep "/${database}[\.\-]")"
193+
else
194+
# partial timestamp. search for matching object path
195+
full_path="$(eval "${ls_cmd[@]}" "${base}${timestamp}*/" | grep "/${database}[\.\-]")"
196+
fi
197+
fi
198+
check_object_exists "${full_path}" || { echoerr "Error file not found"; exit 1; }
199+
echo "${full_path}"
200+
}
201+
202+
203+
function find_object_s3 {
204+
# find the object
205+
# the following are are all valid
206+
# s3://mybucket/20230413000003/my_database.sql.lz4
207+
# s3://mybucket/20230413000003/ my_database
208+
# s3://mybucket/ my_database
209+
# s3://mybucket/20230413 my_database
210+
211+
source="${1}"
212+
database="${2:-}"
213+
timestamp="$(get_timestamp_from_object_path "${source}")"
214+
base="$(get_basename_from_object_path "${source}")"
215+
216+
if [[ "${timestamp}" == "" ]]; then
217+
# no timestamp in the path, find the latest
218+
timestamp="$(eval "${ls_cmd[@]}" "${base}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | sort | tail -n1)"
219+
file="$(eval "${ls_cmd[@]}" "${base}${timestamp}/" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${database}[\.\-]")"
220+
full_path="${base}${timestamp}/${file}"
221+
else
222+
# has timestamp, either fully qualified, or needs expanding
223+
if [[ $source =~ [0-9]{14}/${database} ]]; then
224+
# should be complete path
225+
full_path="${source}"
226+
elif [[ $source =~ [0-9]{14} ]]; then
227+
# complete timestamp
228+
file="$(eval "${ls_cmd[@]}" "${source}" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${database}[\.\-]")"
229+
full_path="${source}${file}"
230+
else
231+
# partial timestamp. search for matching object path
232+
timestamp="$(eval "${ls_cmd[@]}" "${base}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | grep "${timestamp}")"
233+
timestamp_count=$(wc -l <<<"${timestamp}")
234+
[[ "${timestamp_count}" -gt 1 ]] && { echoerr "Error too many items found. Timestamp is not distinct."; exit 1; }
235+
file="$(eval "${ls_cmd[@]}" "${base}${timestamp}/" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${database}[\.\-]")"
236+
full_path="${base}${timestamp}/${file}"
237+
fi
238+
fi
239+
check_object_exists "${full_path}" || { echoerr "Error file not found"; exit 1; }
240+
echo "${full_path}"
241+
}
242+
243+
function find_object_file {
244+
echoerr "find_object_file not implemented"
245+
exit 1
246+
}

commands/load

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -208,30 +208,24 @@ case "${#args[@]}" in
208208
;;
209209
esac
210210

211-
echo "Source: ${source}"
212-
echo "SRC DB: ${src_database}"
213-
echo "DEST DB: ${dest_database}"
211+
echo "Source: ${source}"
212+
echo "Source Database: ${src_database}"
213+
echo "Destination Database: ${dest_database}"
214214

215215
# Set the umask, umask defaults to 0077 to keep files private during db dumping
216216
umask "${umask:-0077}"
217217

218-
date="$(date --utc "+${date_format:-%Y%m%d%H%M%S}")"
219-
220218
get_storage_commands "${source}"
221-
222-
# Find latest
223-
latest="$(eval "${ls_cmd[@]}" "${source}" | sed -E -e '/[0-9]{14}/!d' -e 's/.*([0-9]{14})\/$/\1/' | sort | tail -n1)"
224-
echo "Latest: ${latest}"
225-
226-
file="$(eval "${ls_cmd[@]}" "${source}/${latest}/" | sed -E -e 's/.*[\/ ]([^\/]*)$/\1/' | grep "^${src_database}[\.\-]" || true)"
227-
if [[ -z "${file}" ]]; then
228-
echo "No save found for database in ${latest}"
219+
file_path="$(eval ${find_object} "${source}" "${src_database}")"
220+
if [[ -z "${file_path}" ]]; then
221+
echo "No save found for database ${src_database} in ${source}"
229222
exit 1
223+
else
224+
echo "Filepath: ${file_path}"
230225
fi
231-
echo "File: ${file}"
232226

233227
if [[ "${compression:-auto}" == "auto" ]]; then
234-
case "${file##*.}" in
228+
case "${file_path##*.}" in
235229
"lz4")
236230
compression="lz4"
237231
;;
@@ -268,9 +262,9 @@ wait_mariadb "${host}" "${port:-3306}"
268262
/usr/bin/mysql "${connection[@]}" -rs -e "DROP DATABASE IF EXISTS ${dest_database}; CREATE DATABASE ${dest_database}"
269263

270264
if [[ "${#sed_cmd[@]}" -gt 0 ]]; then
271-
eval "${fetch_cmd[@]}" "${source}/${latest}/${file}" "-" | "${decompression_cmd[@]}" | sed -E "${sed_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
265+
eval "${fetch_cmd[@]}" "${file_path}" "-" | "${decompression_cmd[@]}" | sed -E "${sed_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
272266
else
273-
eval "${fetch_cmd[@]}" "${source}/${latest}/${file}" "-" | "${decompression_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
267+
eval "${fetch_cmd[@]}" "${file_path}" "-" | "${decompression_cmd[@]}" | /usr/bin/mysql "${connection_no_db[@]}" "${dest_database}"
274268
fi
275269

276270
echo ">> Finished."

commands/load.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,5 @@ Command to load a sql dump from object storage (or filesystem) to a destination
88
* DROP and CREATE destination database
99
* Support for sed filters
1010
* gsutil auth helper
11-
12-
## Limitations
13-
14-
* Source must be a directory named with the date ie 20200813000000 (must be 14 chars)
15-
* Only the latest is loadable
11+
* Source can either be a bucket root with timestamped directories named with the date ie 20200813000000 (must be 14 chars),
12+
or a path to the dump to restore, or a date stamped path (or partial datestamp), eg `s3://mybucket`, or `s3://mybucket/20200813000000`, or `s3://mybucket/20200813000000/my_database.sql.lz4`

tests/test.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ docker run -d --name mariadb -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password ${MARI
6565
docker run -d --name minio -p 9000:9000 ${MINIO_IMAGE}:${MINIO_TAG} server /data > /dev/null
6666
docker run --rm -i --link minio -e MC_HOST_minio=http://minioadmin:minioadmin@minio:9000 minio/mc:latest --quiet mb minio/backup
6767
docker run -i --name ${TEST_NAME}-save --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER save --host mariadb --password password s3://backup
68-
docker run -i --name ${TEST_NAME}-load --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER load --host mariadb --password password s3://backup/ mysql newdb
69-
cleanup mariadb minio ${TEST_NAME}-save ${TEST_NAME}-load
68+
docker run -i --name ${TEST_NAME}-load1 --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER load --host mariadb --password password s3://backup/ mysql newdb
69+
docker run -i --name ${TEST_NAME}-load2 --link mariadb --link minio -e AWS_ACCESS_KEY_ID=minioadmin -e AWS_SECRET_ACCESS_KEY=minioadmin -e AWS_S3_ADDITIONAL_ARGS="--endpoint-url http://minio:9000" $TEST_CONTAINER load --host mariadb --password password s3://backup/202 mysql newdb
70+
cleanup mariadb minio ${TEST_NAME}-save ${TEST_NAME}-load1 ${TEST_NAME}-load2
7071

7172
echo "=> Test mysql command"
7273
docker run -d --name mariadb -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password ${MARIADB_IMAGE}:${MARIADB_TAG} > /dev/null

0 commit comments

Comments
 (0)