11#! /usr/bin/env bash
22
3+ # Parse command line arguments
4+ FORCE_REFRESH=false
5+ QUIET_MODE=false
6+ while [[ $# -gt 0 ]]; do
7+ case " $1 " in
8+ --force-refresh|-f)
9+ FORCE_REFRESH=true
10+ shift
11+ ;;
12+ --quiet|-q)
13+ QUIET_MODE=true
14+ shift
15+ ;;
16+ --help|-h)
17+ echo " Usage: $0 [--force-refresh|-f] [--quiet|-q] [--help|-h]"
18+ echo " --force-refresh, -f Force refresh of cached data"
19+ echo " --quiet, -q Suppress progress and informational output"
20+ echo " --help, -h Show this help message"
21+ exit 0
22+ ;;
23+ * )
24+ echo " Unknown option: $1 "
25+ echo " Use --help for usage information"
26+ exit 1
27+ ;;
28+ esac
29+ done
30+
331# Ensure that the common script exists and is readable, then verify it has no
432# syntax errors and defines the required function.
533common_script=" $( dirname " $0 " ) /common.sh"
@@ -12,6 +40,22 @@ set_colors
1240
1341check_github_actions
1442
43+ # Override progress function if in quiet mode
44+ if [ " $QUIET_MODE " = true ]; then
45+ progress () {
46+ # Do nothing in quiet mode
47+ :
48+ }
49+ fi
50+
51+ # Cache configuration
52+ CACHE_DIR=" $HOME /.cache/lab0-c"
53+ CACHE_FILE=" $CACHE_DIR /upstream_commit"
54+ CACHE_EXPIRY=900 # Cache for 15 minutes (in seconds)
55+
56+ # Create cache directory if it doesn't exist
57+ mkdir -p " $CACHE_DIR "
58+
1559TOTAL_STEPS=6
1660CURRENT_STEP=0
1761
4690(( CURRENT_STEP++ ))
4791progress " $CURRENT_STEP " " $TOTAL_STEPS "
4892
49- # Generate a random integer in [0..999].
50- random_ms=$(( RANDOM % 1000 ))
93+ # Check if cache exists and is still valid
94+ use_cache=false
95+ if [ " $FORCE_REFRESH " = true ]; then
96+ if [ " $QUIET_MODE " = false ]; then
97+ printf " \r%80s\r" " "
98+ echo " Force refresh requested. Clearing cache..."
99+ fi
100+ rm -f " $CACHE_FILE " " $RATE_LIMIT_FILE "
101+ elif [ -f " $CACHE_FILE " ]; then
102+ cache_age=$(( $(date +% s) - $(stat - f % m "$CACHE_FILE " 2 >/ dev/ null || stat - c % Y "$CACHE_FILE " 2 >/ dev/ null || echo 0 )) )
103+ if [ " $cache_age " -lt " $CACHE_EXPIRY " ]; then
104+ upstream_hash=$( cat " $CACHE_FILE " )
105+ if [ -n " $upstream_hash " ]; then
106+ use_cache=true
107+ if [ " $QUIET_MODE " = false ]; then
108+ printf " \r%80s\r" " "
109+ echo " Using cached upstream commit (${cache_age} s old, expires in $(( CACHE_EXPIRY - cache_age)) s)"
110+ fi
111+ fi
112+ else
113+ if [ " $QUIET_MODE " = false ]; then
114+ printf " \r%80s\r" " "
115+ echo " Cache expired (${cache_age} s old). Refreshing..."
116+ fi
117+ fi
118+ fi
119+
120+ # Only sleep and fetch if not using cache
121+ if [ " $use_cache " = false ]; then
122+ # Generate a random integer in [0..999].
123+ random_ms=$(( RANDOM % 1000 ))
124+
125+ # Add exponential backoff if we've been rate limited recently
126+ RATE_LIMIT_FILE=" $CACHE_DIR /rate_limited"
127+ if [ -f " $RATE_LIMIT_FILE " ]; then
128+ last_limited=$(( $(date +% s) - $(stat - f % m "$RATE_LIMIT_FILE " 2 >/ dev/ null || stat - c % Y "$RATE_LIMIT_FILE " 2 >/ dev/ null || echo 0 )) )
129+ if [ " $last_limited " -lt 300 ]; then # If rate limited in last 5 minutes
130+ random_ms=$(( random_ms + 2000 )) # Add 2 seconds
131+ if [ " $QUIET_MODE " = false ]; then
132+ printf " \r%80s\r" " "
133+ echo " Rate limit detected. Adding delay..."
134+ fi
135+ fi
136+ fi
137+
138+ # Convert that to a decimal of the form 0.xxx so that 'sleep' interprets it as seconds.
139+ # e.g., if random_ms is 5, we convert that to 0.005 (i.e. 5 ms).
140+ # Use printf for portability (bc might not be installed)
141+ sleep_time=" 0.$( printf " %03d" " $(( random_ms % 1000 )) " ) "
51142
52- # Convert that to a decimal of the form 0.xxx so that 'sleep' interprets it as seconds.
53- # e.g., if random_ms is 5, we convert that to 0.005 (i.e. 5 ms).
54- sleep_time=" 0.$( printf " %03d" " $random_ms " ) "
143+ # For delays > 1 second, handle separately
144+ if [ " $random_ms " -ge 1000 ]; then
145+ sleep_seconds=$(( random_ms / 1000 ))
146+ sleep_ms=$(( random_ms % 1000 ))
147+ sleep_time=" ${sleep_seconds} .$( printf " %03d" " $sleep_ms " ) "
148+ fi
55149
56- sleep " $sleep_time "
150+ sleep " $sleep_time "
151+ fi
57152
58153# 2. Fetch latest commit from GitHub
59154(( CURRENT_STEP++ ))
@@ -62,53 +157,95 @@ progress "$CURRENT_STEP" "$TOTAL_STEPS"
62157REPO_OWNER=$( git config -l | grep -w remote.origin.url | sed -E ' s%^.*github.com[/:]([^/]+)/lab0-c.*%\1%' )
63158REPO_NAME=" lab0-c"
64159
65- repo_html=$( curl -s " https://github.com/${REPO_OWNER} /${REPO_NAME} " )
66-
67- # Extract the default branch name from data-default-branch="..."
68- DEFAULT_BRANCH=$( echo " $repo_html " | sed -nE " s#.*${REPO_OWNER} /${REPO_NAME} /blob/([^/]+)/LICENSE.*#\1#p" | head -n 1)
160+ # Only fetch from network if not using cache
161+ if [ " $use_cache " = false ]; then
162+ # First try using git ls-remote (much faster and less likely to be rate limited)
163+ if [ " $QUIET_MODE " = false ]; then
164+ printf " \r%80s\r" " "
165+ echo " Checking upstream repository..."
166+ fi
167+ upstream_hash=$( git ls-remote --heads origin master 2> /dev/null | cut -f1)
69168
70- if [ " $DEFAULT_BRANCH " != " master" ]; then
71- echo " $DEFAULT_BRANCH "
72- throw " The default branch for $REPO_OWNER /$REPO_NAME is not 'master'."
73- fi
169+ # If git ls-remote fails or returns empty, fall back to web scraping
170+ if [ -z " $upstream_hash " ]; then
171+ if [ " $QUIET_MODE " = false ]; then
172+ printf " \r%80s\r" " "
173+ echo " git ls-remote failed. Falling back to web scraping..."
174+ fi
74175
75- # Construct the URL to the commits page for the default branch
76- COMMITS_URL=" https://github.com/${REPO_OWNER} /${REPO_NAME} /commits/${DEFAULT_BRANCH} "
77-
78- temp_file=$( mktemp)
79- curl -sSL -o " $temp_file " " $COMMITS_URL "
80-
81- # general grep pattern that finds commit links
82- upstream_hash=$(
83- sed -nE ' s/.*href="[^"]*\/commit\/([0-9a-f]{40}).*/\1/p' " $temp_file " | head -n 1
84- )
85-
86- rm -f " $temp_file "
87-
88- # If HTML parsing fails, fallback to using GitHub REST API
89- if [ -z " $upstream_hash " ]; then
90- API_URL=" https://api.github.com/repos/${REPO_OWNER} /${REPO_NAME} /commits"
91-
92- # Try to use cached GitHub credentials from GitHub CLI
93- # https://docs.github.com/en/get-started/git-basics/caching-your-github-credentials-in-git
94- if command -v gh > /dev/null 2>&1 ; then
95- TOKEN=$( gh auth token 2> /dev/null)
96- if [ -n " $TOKEN " ]; then
97- response=$( curl -sSL -H " Authorization: token $TOKEN " " $API_URL " )
176+ # Add User-Agent header to avoid being blocked
177+ USER_AGENT=" Mozilla/5.0 (compatible; lab0-c-checker/1.0)"
178+
179+ # Try with rate limit detection
180+ repo_html=$( curl -s -w " \n%{http_code}" -H " User-Agent: $USER_AGENT " " https://github.com/${REPO_OWNER} /${REPO_NAME} " )
181+ http_code=$( echo " $repo_html " | tail -n 1)
182+ repo_html=$( echo " $repo_html " | sed ' $d' )
183+
184+ # Check for rate limiting (HTTP 429 or 403)
185+ if [ " $http_code " = " 429" ] || [ " $http_code " = " 403" ]; then
186+ touch " $RATE_LIMIT_FILE "
187+ if [ " $QUIET_MODE " = false ]; then
188+ printf " \r%80s\r" " "
189+ echo " GitHub rate limit detected (HTTP $http_code ). Using fallback..."
190+ fi
191+
192+ # Try to use last known good commit from git log
193+ upstream_hash=$( git ls-remote origin master 2> /dev/null | cut -f1)
194+ if [ -z " $upstream_hash " ]; then
195+ throw " Rate limited by GitHub and no fallback available. Please try again later."
196+ fi
197+ else
198+ # Extract the default branch name from data-default-branch="..."
199+ DEFAULT_BRANCH=$( echo " $repo_html " | sed -nE " s#.*${REPO_OWNER} /${REPO_NAME} /blob/([^/]+)/LICENSE.*#\1#p" | head -n 1)
200+
201+ if [ " $DEFAULT_BRANCH " != " master" ]; then
202+ echo " $DEFAULT_BRANCH "
203+ throw " The default branch for $REPO_OWNER /$REPO_NAME is not 'master'."
204+ fi
205+
206+ # Construct the URL to the commits page for the default branch
207+ COMMITS_URL=" https://github.com/${REPO_OWNER} /${REPO_NAME} /commits/${DEFAULT_BRANCH} "
208+
209+ temp_file=$( mktemp)
210+ curl -sSL -H " User-Agent: $USER_AGENT " -o " $temp_file " " $COMMITS_URL "
211+
212+ # general grep pattern that finds commit links
213+ upstream_hash=$(
214+ sed -nE ' s/.*href="[^"]*\/commit\/([0-9a-f]{40}).*/\1/p' " $temp_file " | head -n 1
215+ )
216+
217+ rm -f " $temp_file "
218+
219+ # If HTML parsing fails, fallback to using GitHub REST API
220+ if [ -z " $upstream_hash " ]; then
221+ API_URL=" https://api.github.com/repos/${REPO_OWNER} /${REPO_NAME} /commits"
222+
223+ # Try to use cached GitHub credentials from GitHub CLI
224+ # https://docs.github.com/en/get-started/git-basics/caching-your-github-credentials-in-git
225+ if command -v gh > /dev/null 2>&1 ; then
226+ TOKEN=$( gh auth token 2> /dev/null)
227+ if [ -n " $TOKEN " ]; then
228+ response=$( curl -sSL -H " Authorization: token $TOKEN " -H " User-Agent: $USER_AGENT " " $API_URL " )
229+ fi
230+ fi
231+
232+ # If response is empty (i.e. token not available or failed), use unauthenticated request.
233+ if [ -z " $response " ]; then
234+ response=$( curl -sSL -H " User-Agent: $USER_AGENT " " $API_URL " )
235+ fi
236+
237+ # Extract the latest commit SHA from the JSON response
238+ upstream_hash=$( echo " $response " | grep -m 1 ' "sha":' | sed -E ' s/.*"sha": "([^"]+)".*/\1/' )
239+ fi
98240 fi
99241 fi
100242
101- # If response is empty (i.e. token not available or failed), use unauthenticated request.
102- if [ -z " $response " ]; then
103- response=$( curl -sSL " $API_URL " )
243+ if [ -z " $upstream_hash " ]; then
244+ throw " Failed to retrieve upstream commit hash from GitHub.\n"
104245 fi
105246
106- # Extract the latest commit SHA from the JSON response
107- upstream_hash=$( echo " $response " | grep -m 1 ' "sha":' | sed -E ' s/.*"sha": "([^"]+)".*/\1/' )
108- fi
109-
110- if [ -z " $upstream_hash " ]; then
111- throw " Failed to retrieve upstream commit hash from GitHub.\n"
247+ # Cache the result
248+ echo " $upstream_hash " > " $CACHE_FILE "
112249fi
113250
114251# 3. Check local repository awareness
@@ -167,6 +304,9 @@ if [ $failed -ne 0 ]; then
167304 exit 1
168305fi
169306
170- echo " Fingerprint: $( make_random_string 24 " $REPO_OWNER " ) "
307+ if [ " $QUIET_MODE " = false ]; then
308+ printf " \r%80s\r" " "
309+ echo " Fingerprint: $( make_random_string 24 " $REPO_OWNER " ) "
310+ fi
171311
172312exit 0
0 commit comments