File tree Expand file tree Collapse file tree 4 files changed +93
-2
lines changed
Expand file tree Collapse file tree 4 files changed +93
-2
lines changed Original file line number Diff line number Diff line change @@ -16,10 +16,10 @@ COPY requirements.txt .
1616# Install required Python packages
1717RUN pip3 install --no-cache-dir -r requirements.txt
1818
19- # Copy Spark job scripts
19+ # Copy Spark job scripts from the jobs folder
2020COPY jobs /opt/bitnami/spark/jobs/
2121
22- # Copy input files
22+ # Copy input files from the input folder
2323COPY input /opt/bitnami/spark/input/
2424
2525# Ensure correct permissions
Original file line number Diff line number Diff line change 1+ from pyspark .sql import SparkSession
2+ from pyspark .sql .functions import col
3+
4+ # Create Spark Session
5+ spark = SparkSession .builder \
6+ .appName ("KafkaSparkStreaming" ) \
7+ .config ("spark.jars.packages" ,
8+ "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.1" ) \
9+ .getOrCreate ()
10+
11+ # Read Stream from Kafka
12+ df = spark \
13+ .readStream \
14+ .format ("kafka" ) \
15+ .option ("kafka.bootstrap.servers" , "localhost:9092" ) \
16+ .option ("subscribe" , "streaming-topic" ) \
17+ .option ("startingOffsets" , "earliest" ) \
18+ .load ()
19+
20+ # Convert Kafka message from binary to string
21+ df = df .selectExpr ("CAST(value AS STRING)" )
22+
23+ # Process Data (Modify as needed)
24+ query = df .writeStream \
25+ .outputMode ("append" ) \
26+ .format ("console" ) \
27+ .start ()
28+
29+ query .awaitTermination ()
Original file line number Diff line number Diff line change 1+ version : ' 3.7'
2+
3+ services :
4+ prometheus :
5+ image : prom/prometheus
6+ container_name : prometheus
7+ volumes :
8+ - /home/frocode/githubRepos/Realtime_Streaming_Unstructured-Data/promothues/promo/prometheus.yaml:/etc/prometheus/prometheus.yml
9+ ports :
10+ - " 9090:9090"
11+ networks :
12+ - monitoring
13+ restart : unless-stopped
14+
15+ grafana :
16+ image : grafana/grafana
17+ container_name : grafana
18+ environment :
19+ - GF_SECURITY_ADMIN_PASSWORD=admin
20+ ports :
21+ - " 3000:3000"
22+ volumes :
23+ - grafana-storage:/var/lib/grafana
24+ depends_on :
25+ - prometheus
26+ networks :
27+ - monitoring
28+ restart : unless-stopped
29+
30+ node_exporter :
31+ image : quay.io/prometheus/node-exporter:v1.8.2
32+ container_name : node_exporter
33+ command : " --path.procfs=/proc --path.sysfs=/sys"
34+ restart : unless-stopped
35+ ports :
36+ - " 9100:9100"
37+ networks :
38+ - monitoring
39+
40+ networks :
41+ monitoring :
42+ driver : bridge
43+
44+ volumes :
45+ grafana-storage :
46+ driver : local
Original file line number Diff line number Diff line change 1+ global :
2+ scrape_interval : 5s # Adjust as needed
3+
4+ scrape_configs :
5+ - job_name : ' prometheus'
6+ static_configs :
7+ - targets : ['localhost:9090']
8+
9+ - job_name : ' node_exporter'
10+ static_configs :
11+ - targets : ['node_exporter:9100']
12+
13+ - job_name : ' docker'
14+ static_configs :
15+ - targets : ['host.docker.internal:9323'] # Use this for Mac/Windows
16+ # - targets: ['localhost:9323'] # Use this for Linux
You can’t perform that action at this time.
0 commit comments