Skip to content

Commit 37ccaac

Browse files
committed
Add Flink SQL module and data generators for airlines and airports
1 parent 7dc1250 commit 37ccaac

File tree

27 files changed

+1867
-9
lines changed

27 files changed

+1867
-9
lines changed

Makefile

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,15 @@ help:
4040
@echo "${BLUE}${INFO} 🚀 run-sql${RESET} - Run the Flink Table API application"
4141
@echo "${BLUE}${INFO} 🧪 ci-checks${RESET} - Run CI checks locally"
4242
@echo ""
43-
@echo "${YELLOW}${STAR} Data Generator:${RESET}"
43+
@echo "${YELLOW}${STAR} Data Generators:${RESET}"
44+
@echo "${BLUE}${INFO} 🏗️ build-data-generator${RESET} - Build the Flink Data Generator module"
45+
@echo "${BLUE}${INFO} 🏗️ build-ref-generator${RESET} - Build the Reference Data Generator module"
4446
@echo "${BLUE}${INFO} 🚀 run-data-generator-local${RESET} - Run the Flink Data Generator in local environment"
4547
@echo "${BLUE}${INFO} ☁️ run-data-generator-cloud${RESET} - Run the Flink Data Generator in cloud environment"
4648
@echo "${BLUE}${INFO} 🚀 run-data-generator-with-props${RESET} - Run with custom properties (PROPS=path/to/properties)"
49+
@echo "${BLUE}${INFO} 🚀 run-ref-generator-local${RESET} - Run the Reference Data Generator in local environment"
50+
@echo "${BLUE}${INFO} ☁️ run-ref-generator-cloud${RESET} - Run the Reference Data Generator in cloud environment"
51+
@echo "${BLUE}${INFO} 🚀 run-ref-generator-with-props${RESET} - Run the Reference Data Generator with custom properties (PROPS=path/to/properties)"
4752
@echo ""
4853
@echo "${YELLOW}${STAR} Flink Table API:${RESET}"
4954
@echo "${BLUE}${INFO} 🚀 run-sql-status-local${RESET} - Run Flight Status Dashboard locally"
@@ -55,6 +60,11 @@ help:
5560
@echo "${BLUE}${INFO} ☁️ run-sql-delays-cloud${RESET} - Run Airline Delay Analytics on cloud"
5661
@echo "${BLUE}${INFO} ☁️ run-sql-all-cloud${RESET} - Run all SQL use cases on cloud"
5762
@echo ""
63+
@echo "${YELLOW}${STAR} Flink SQL:${RESET}"
64+
@echo "${BLUE}${INFO} 🚀 flink-sql-client${RESET} - Start interactive Flink SQL client"
65+
@echo "${BLUE}${INFO} 🚀 flink-sql-execute${RESET} - Execute a specific SQL file (SQL_FILE=path/to/file.sql)"
66+
@echo "${BLUE}${INFO} 🚀 flink-sql-build${RESET} - Build Flink SQL client image"
67+
@echo ""
5868
@echo "${YELLOW}${STAR} Docker Management:${RESET}"
5969
@echo "${BLUE}${INFO} 🐳 docker-up${RESET} - Start all containers"
6070
@echo "${BLUE}${INFO} 🐳 docker-down${RESET} - Stop and remove all containers"
@@ -297,7 +307,7 @@ terraform-org-id:
297307
exit 1; \
298308
fi
299309
@echo "${BLUE}${INFO} Exporting organization ID to TF_VAR_org_id...${RESET}"
300-
@export TF_VAR_org_id=$$(confluent organization list -o json | jq -c -r '.[] | select(.is_current)' | jq '.id'); \
310+
@export TF_VAR_org_id=$$(confluent organization list -o json | jq -c -r '.[] | select(.is_current)' | jq -r '.id'); \
301311
echo "TF_VAR_org_id=$$TF_VAR_org_id"; \
302312
echo "export TF_VAR_org_id=$$TF_VAR_org_id" >> .env; \
303313
echo "${GREEN}${CHECK} Organization ID exported to TF_VAR_org_id and saved to .env file!${RESET}"
@@ -460,7 +470,8 @@ docker-logs:
460470

461471
docker-restart:
462472
@echo "${BLUE}${ROCKET} Restarting Docker containers...${RESET}"
463-
docker compose restart
473+
docker compose down
474+
docker compose up -d
464475
@echo "${GREEN}${CHECK} Docker containers restarted successfully!${RESET}"
465476

466477
# Run CI checks locally
@@ -492,26 +503,45 @@ docker-build:
492503
@echo "${GREEN}${CHECK} Docker image built successfully!${RESET}"
493504
@echo "${YELLOW}${INFO} Run with: docker run -it flink-for-java-workshop:local${RESET}"
494505

506+
# Flink SQL related targets
507+
.PHONY: flink-sql-client flink-sql-execute flink-sql-build
508+
509+
flink-sql-build:
510+
@echo "${BLUE}${ROCKET} Building Flink SQL client image...${RESET}"
511+
docker compose build jobmanager
512+
docker compose build taskmanager
513+
514+
flink-sql-client:
515+
@echo "${BLUE}${ROCKET} Starting interactive Flink SQL client...${RESET}"
516+
docker compose run --rm sql-client
517+
518+
flink-sql-execute:
519+
@echo "${BLUE}${ROCKET} Executing SQL file...${RESET}"
520+
@if [ -z "$(SQL_FILE)" ]; then \
521+
echo "${RED}${ERROR} SQL_FILE parameter is required${RESET}"; \
522+
echo "Example: make flink-sql-execute SQL_FILE=usecases/airline_delays.sql"; \
523+
exit 1; \
524+
fi
525+
docker compose run --rm -e SQL_FILE=$(SQL_FILE) sql-client
526+
495527
# Flink Data Generator targets
496-
.PHONY: build-data-generator
528+
.PHONY: build-data-generator run-data-generator-local run-data-generator-cloud run-data-generator-with-props
529+
497530
build-data-generator:
498531
@echo "${BLUE}${ROCKET} Building Flink Data Generator...${RESET}"
499532
./gradlew :flink-data-generator:build
500533
@echo "${GREEN}${CHECK} Flink Data Generator built successfully!${RESET}"
501534

502-
.PHONY: run-data-generator-local
503535
run-data-generator-local:
504536
@echo "${BLUE}${ROCKET} Running Flink Data Generator in local environment...${RESET}"
505537
./gradlew :flink-data-generator:run --args="--env local"
506538
@echo "${GREEN}${CHECK} Flink Data Generator completed!${RESET}"
507539

508-
.PHONY: run-data-generator-cloud
509540
run-data-generator-cloud:
510541
@echo "${BLUE}${CLOUD} Running Flink Data Generator in cloud environment...${RESET}"
511542
./gradlew :flink-data-generator:run --args="--env cloud"
512543
@echo "${GREEN}${CHECK} Flink Data Generator completed!${RESET}"
513544

514-
.PHONY: run-data-generator-with-props
515545
run-data-generator-with-props:
516546
@echo "${BLUE}${ROCKET} Running Flink Data Generator with custom properties...${RESET}"
517547
@if [ -z "$(PROPS)" ]; then \
@@ -521,6 +551,33 @@ run-data-generator-with-props:
521551
./gradlew :flink-data-generator:run --args="--properties $(PROPS)"
522552
@echo "${GREEN}${CHECK} Flink Data Generator completed!${RESET}"
523553

554+
# Reference Data Generator targets
555+
.PHONY: build-ref-generator run-ref-generator-local run-ref-generator-cloud run-ref-generator-with-props
556+
557+
build-ref-generator:
558+
@echo "${BLUE}${ROCKET} Building Reference Data Generator...${RESET}"
559+
./gradlew :data-generator:build
560+
@echo "${GREEN}${CHECK} Reference Data Generator built successfully!${RESET}"
561+
562+
run-ref-generator-local:
563+
@echo "${BLUE}${ROCKET} Running Reference Data Generator in local environment...${RESET}"
564+
./gradlew :data-generator:run --args="--env local"
565+
@echo "${GREEN}${CHECK} Reference Data Generator completed!${RESET}"
566+
567+
run-ref-generator-cloud:
568+
@echo "${BLUE}${CLOUD} Running Reference Data Generator in cloud environment...${RESET}"
569+
./gradlew :data-generator:run --args="--env cloud"
570+
@echo "${GREEN}${CHECK} Reference Data Generator completed!${RESET}"
571+
572+
run-ref-generator-with-props:
573+
@echo "${BLUE}${ROCKET} Running Reference Data Generator with custom properties...${RESET}"
574+
@if [ -z "$(PROPS)" ]; then \
575+
echo "${RED}${ERROR} Please specify properties file with PROPS=path/to/properties.${RESET}"; \
576+
exit 1; \
577+
fi
578+
./gradlew :data-generator:run --args="--properties $(PROPS)"
579+
@echo "${GREEN}${CHECK} Reference Data Generator completed!${RESET}"
580+
524581
# Clean up
525582
.PHONY: clean
526583
clean:
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"type": "record",
3+
"name": "Airline",
4+
"namespace": "io.confluent.developer.models.reference",
5+
"doc": "Schema for airline reference data",
6+
"fields": [
7+
{
8+
"name": "airlineCode",
9+
"type": "string",
10+
"doc": "Unique identifier for the airline"
11+
},
12+
{
13+
"name": "airlineName",
14+
"type": "string",
15+
"doc": "Full name of the airline"
16+
},
17+
{
18+
"name": "country",
19+
"type": "string",
20+
"doc": "Country where the airline is based"
21+
}
22+
]
23+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"type": "record",
3+
"name": "Airport",
4+
"namespace": "io.confluent.developer.models.reference",
5+
"doc": "Schema for airport reference data",
6+
"fields": [
7+
{
8+
"name": "airportCode",
9+
"type": "string",
10+
"doc": "IATA airport code"
11+
},
12+
{
13+
"name": "airportName",
14+
"type": "string",
15+
"doc": "Full name of the airport"
16+
},
17+
{
18+
"name": "city",
19+
"type": "string",
20+
"doc": "City where the airport is located"
21+
},
22+
{
23+
"name": "country",
24+
"type": "string",
25+
"doc": "Country where the airport is located"
26+
},
27+
{
28+
"name": "latitude",
29+
"type": "double",
30+
"doc": "Latitude coordinate of the airport"
31+
},
32+
{
33+
"name": "longitude",
34+
"type": "double",
35+
"doc": "Longitude coordinate of the airport"
36+
}
37+
]
38+
}

data-generator/README.adoc

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
= Reference Data Generator 🏢
2+
:toc:
3+
:icons: font
4+
:sectnums:
5+
6+
This module generates reference data for airlines and airports and sends it to Kafka topics.
7+
The data is used by the Flink SQL module for enriching flight data.
8+
9+
== Features
10+
11+
* Generates realistic airline data with airline codes, names, and countries
12+
* Generates realistic airport data with airport codes, names, cities, countries, and locations
13+
* Sends data to Kafka topics in JSON format
14+
* Configurable via properties files or command-line arguments
15+
* Supports both local Kafka and Confluent Cloud
16+
17+
== Configuration
18+
19+
The generator can be configured using properties files or command-line arguments.
20+
21+
=== Properties Files
22+
23+
Two property files are included:
24+
25+
* `local.properties` - Configuration for local Kafka
26+
* `cloud-template.properties` - Template for Confluent Cloud configuration
27+
28+
You can also create your own properties file and specify it with the `--properties` option.
29+
30+
#### Local Properties
31+
32+
```properties
33+
# Kafka Configuration for local environment
34+
bootstrap.servers=localhost:29092
35+
36+
# Topic Configuration
37+
topic.airlines=airlines
38+
topic.airports=airports
39+
40+
# Generator Configuration
41+
generator.airlines.count=20
42+
generator.airports.count=50
43+
44+
# Environment
45+
environment=local
46+
```
47+
48+
#### Cloud Properties
49+
50+
```properties
51+
# Kafka Configuration for Confluent Cloud
52+
bootstrap.servers=${BOOTSTRAP_SERVERS}
53+
54+
# Topic Configuration
55+
topic.airlines=airlines
56+
topic.airports=airports
57+
58+
# Generator Configuration
59+
generator.airlines.count=20
60+
generator.airports.count=50
61+
62+
# Security settings
63+
security.protocol=SASL_SSL
64+
sasl.mechanism=PLAIN
65+
sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required username="${CONFLUENT_API_KEY}" password="${CONFLUENT_API_SECRET}";
66+
67+
# Environment
68+
environment=cloud
69+
```
70+
71+
=== Command-Line Options
72+
73+
The generator supports the following command-line options:
74+
75+
* `-e, --env <environment>` - Environment (local or cloud), default: local
76+
* `-p, --properties <path>` - Path to properties file
77+
* `--cloud-key <key>` - Confluent Cloud API Key (overrides properties)
78+
* `--cloud-secret <secret>` - Confluent Cloud API Secret (overrides properties)
79+
* `-b, --bootstrap-servers <servers>` - Kafka bootstrap servers (overrides properties)
80+
* `-h, --help` - Show help message
81+
82+
== Running the Generator
83+
84+
=== Using Makefile
85+
86+
The easiest way to run the generator is using the Makefile targets:
87+
88+
```bash
89+
# Run with local Kafka
90+
make run-ref-data-generator
91+
92+
# Run with Confluent Cloud
93+
make run-ref-data-generator-cloud CLOUD_KEY=<key> CLOUD_SECRET=<secret> BOOTSTRAP_SERVERS=<servers>
94+
95+
# Run with custom properties file
96+
make run-ref-data-generator-with-props PROPS=path/to/properties
97+
```
98+
99+
=== Using Gradle
100+
101+
You can also run the generator directly with Gradle:
102+
103+
```bash
104+
# Run with local Kafka
105+
./gradlew :data-generator:run --args="--env local"
106+
107+
# Run with Confluent Cloud
108+
./gradlew :data-generator:run --args="--env cloud --cloud-key <key> --cloud-secret <secret> --bootstrap-servers <servers>"
109+
110+
# Run with custom properties file
111+
./gradlew :data-generator:run --args="--properties path/to/properties"
112+
```
113+
114+
== Data Models
115+
116+
=== Airline
117+
118+
The airline model includes:
119+
120+
* `airlineCode` - IATA airline code (e.g., "AA")
121+
* `airlineName` - Airline name (e.g., "American Airlines")
122+
* `country` - Country of the airline (e.g., "United States")
123+
124+
=== Airport
125+
126+
The airport model includes:
127+
128+
* `airportCode` - IATA airport code (e.g., "JFK")
129+
* `airportName` - Airport name (e.g., "John F. Kennedy International Airport")
130+
* `city` - City (e.g., "New York")
131+
* `country` - Country (e.g., "United States")
132+
* `latitude` - Latitude coordinate
133+
* `longitude` - Longitude coordinate

0 commit comments

Comments
 (0)