Skip to content

Commit b7a04a9

Browse files
committed
Initial version
1 parent 0bc2113 commit b7a04a9

12 files changed

+352
-0
lines changed

NAMESPACE

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(fetch_all_courses)
4+
export(fetch_department_courses)
5+
export(fetch_departments)
6+
export(parse_courses)
7+
export(read_cache)
8+
export(write_cache)

R/cache.R

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#' Write course data to cache
2+
#'
3+
#' @param data Course data frame
4+
#' @param cache_dir Directory to cache results
5+
#' @param dept Department code
6+
#' @export
7+
write_cache <- function(data, cache_dir, dept) {
8+
fs::dir_create(cache_dir)
9+
jsonlite::write_json(
10+
data,
11+
fs::path(cache_dir, paste0(dept, ".json")),
12+
pretty = TRUE
13+
)
14+
}
15+
16+
#' Read course data from cache
17+
#'
18+
#' @param cache_dir Directory containing cached files
19+
#' @param dept Department code (optional)
20+
#' @return Data frame of course data
21+
#' @export
22+
read_cache <- function(cache_dir, dept = NULL) {
23+
if (is.null(dept)) {
24+
files <- fs::dir_ls(cache_dir, glob = "*.json")
25+
purrr::map_dfr(files, jsonlite::read_json, simplifyVector = TRUE)
26+
} else {
27+
file <- fs::path(cache_dir, paste0(dept, ".json"))
28+
if (fs::file_exists(file)) {
29+
jsonlite::read_json(file, simplifyVector = TRUE)
30+
} else {
31+
stop("Cache file not found for department: ", dept)
32+
}
33+
}
34+
}

R/constants.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
ENDPOINT <- "https://explorecourses.stanford.edu/"
2+
DEPARTMENTS_ENDPOINT <- paste0(ENDPOINT, "?view=xml-20140630")
3+
COURSE_ENDPOINT <- paste0(
4+
ENDPOINT,
5+
"search?view=xml-20140630&academicYear=&q={name}&",
6+
"filter-departmentcode-{name}=on&filter-coursestatus-Active=on"
7+
)

R/explorecourses-package.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#' Stanford
2+
#'
3+
#' This package contains R code specific to the Stanford University
4+
#' web applications.
5+
#'
6+
#' @keywords internal
7+
"_PACKAGE"
8+
9+
## usethis namespace: start
10+
## usethis namespace: end
11+
NULL

R/fetch.R

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
#' Fetch department list from Stanford ExploreCourses
2+
#'
3+
#' @param cache_dir Directory to cache results
4+
#' @return A data frame containing department information
5+
#' @export
6+
#' @include constants.R
7+
fetch_departments <- function(cache_dir = NULL) {
8+
req <- httr2::request(DEPARTMENTS_ENDPOINT) |>
9+
httr2::req_perform()
10+
11+
xml_data <- req |>
12+
httr2::resp_body_string() |>
13+
xml2::read_xml()
14+
15+
schools <- xml2::xml_find_all(xml_data, "//school")
16+
17+
departments <- purrr::map_dfr(schools, function(school) {
18+
school_name <- xml2::xml_attr(school, "name")
19+
deps <- xml2::xml_find_all(school, ".//department")
20+
21+
purrr::map_dfr(deps, function(dep) {
22+
tibble::tibble(
23+
name = xml2::xml_attr(dep, "name"),
24+
longname = xml2::xml_attr(dep, "longname"),
25+
school = school_name
26+
)
27+
})
28+
})
29+
30+
if (!is.null(cache_dir)) {
31+
fs::dir_create(cache_dir)
32+
jsonlite::write_json(
33+
departments,
34+
fs::path(cache_dir, "departments.json"),
35+
pretty = TRUE
36+
)
37+
}
38+
39+
departments
40+
}
41+
42+
#' Fetch courses for a specific department
43+
#'
44+
#' @param name Department code
45+
#' @param cache_dir Directory to cache results
46+
#' @return XML content of courses
47+
#' @export
48+
fetch_department_courses <- function(name, cache_dir = NULL) {
49+
url <- glue::glue(COURSE_ENDPOINT, name = name)
50+
51+
req <- httr2::request(url) |>
52+
httr2::req_perform()
53+
54+
content <- httr2::resp_body_string(req)
55+
56+
if (!is.null(cache_dir)) {
57+
fs::dir_create(cache_dir)
58+
59+
xml_path <- fs::path(cache_dir, paste0(name, ".xml"))
60+
readr::write_file(content, xml_path)
61+
}
62+
63+
content
64+
}
65+
66+
#' Parse course XML into a data frame
67+
#'
68+
#' @param xml_content XML content from fetch_department_courses
69+
#' @return A list of data frames containing course information
70+
#' @export
71+
parse_courses <- function(xml_content) {
72+
xml_data <- xml2::read_xml(xml_content)
73+
courses <- xml2::xml_find_all(xml_data, "//course")
74+
75+
course_data <- purrr::map_dfr(courses, function(course) {
76+
# Basic course info
77+
basic_info <- tibble::tibble(
78+
objectID = xml2::xml_text(xml2::xml_find_first(course, ".//courseId")),
79+
year = xml2::xml_text(xml2::xml_find_first(course, ".//year")),
80+
subject = xml2::xml_text(xml2::xml_find_first(course, ".//subject")),
81+
code = xml2::xml_text(xml2::xml_find_first(course, ".//code")),
82+
title = xml2::xml_text(xml2::xml_find_first(course, ".//title")),
83+
description = xml2::xml_text(xml2::xml_find_first(course, ".//description")),
84+
units_min = as.numeric(xml2::xml_text(xml2::xml_find_first(course, ".//unitsMin"))),
85+
units_max = as.numeric(xml2::xml_text(xml2::xml_find_first(course, ".//unitsMax")))
86+
)
87+
88+
# Get sections
89+
sections <- xml2::xml_find_all(course, ".//section")
90+
section_data <- purrr::map_dfr(sections, function(section) {
91+
section_info <- tibble::tibble(
92+
objectID = basic_info$objectID,
93+
term = xml2::xml_text(xml2::xml_find_first(section, ".//term")),
94+
term_id = xml2::xml_text(xml2::xml_find_first(section, ".//termId")),
95+
section_number = xml2::xml_text(xml2::xml_find_first(section, ".//sectionNumber")),
96+
component = xml2::xml_text(xml2::xml_find_first(section, ".//component")),
97+
class_id = xml2::xml_text(xml2::xml_find_first(section, ".//classId")),
98+
current_size = as.numeric(xml2::xml_text(xml2::xml_find_first(section, ".//currentClassSize"))),
99+
max_size = as.numeric(xml2::xml_text(xml2::xml_find_first(section, ".//maxClassSize")))
100+
)
101+
102+
# Get schedules
103+
schedules <- xml2::xml_find_all(section, ".//schedule")
104+
schedule_data <- purrr::map_dfr(schedules, function(schedule) {
105+
tibble::tibble(
106+
section_id = section_info$class_id,
107+
days = xml2::xml_text(xml2::xml_find_first(schedule, ".//days")),
108+
start_time = xml2::xml_text(xml2::xml_find_first(schedule, ".//startTime")),
109+
end_time = xml2::xml_text(xml2::xml_find_first(schedule, ".//endTime")),
110+
location = xml2::xml_text(xml2::xml_find_first(schedule, ".//location"))
111+
)
112+
})
113+
114+
# Join schedules to section
115+
if (nrow(schedule_data) > 0) {
116+
section_info <- dplyr::left_join(
117+
section_info,
118+
schedule_data,
119+
by = c("class_id" = "section_id")
120+
)
121+
}
122+
123+
section_info
124+
})
125+
126+
# Join sections to basic info
127+
if (nrow(section_data) > 0) {
128+
basic_info <- dplyr::left_join(
129+
basic_info,
130+
section_data,
131+
by = "objectID"
132+
)
133+
}
134+
135+
basic_info
136+
})
137+
138+
course_data
139+
}
140+
141+
#' Fetch and process courses for multiple departments
142+
#'
143+
#' @param departments Character vector of department codes
144+
#' @param cache_dir Directory to cache results
145+
#' @return A list of data frames containing course information
146+
#' @export
147+
fetch_all_courses <- function(departments = NULL, cache_dir = NULL) {
148+
if (is.null(departments)) {
149+
departments <- fetch_departments(cache_dir)$name
150+
}
151+
152+
purrr::map_dfr(departments, function(dept) {
153+
message("Fetching department: ", dept)
154+
xml_content <- fetch_department_courses(dept, cache_dir)
155+
courses <- parse_courses(xml_content)
156+
courses$department <- dept
157+
courses
158+
})
159+
}

man/explorecourses-package.Rd

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/fetch_all_courses.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/fetch_department_courses.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/fetch_departments.Rd

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/parse_courses.Rd

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)