Skip to content

Commit f4d82b5

Browse files
committed
++
1 parent c84cd47 commit f4d82b5

File tree

2 files changed

+22
-12
lines changed

2 files changed

+22
-12
lines changed

docker/Dockerfile

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,29 @@
1+
# Use the Bitnami Spark image as the base
12
FROM bitnami/spark:latest
23

3-
# Install Python and pip (if not already included in the base image)
4+
# Switch to root to install dependencies
45
USER root
6+
7+
# Install Python and pip
58
RUN apt-get update && apt-get install -y python3 python3-pip
69

7-
# Install additional dependencies (e.g., Python libraries)
10+
# Set working directory
11+
WORKDIR /opt/bitnami/spark
12+
13+
# Copy dependencies
814
COPY requirements.txt .
915

16+
# Install required Python packages
1017
RUN pip3 install --no-cache-dir -r requirements.txt
1118

12-
# Copy any additional scripts or files into the container
13-
COPY ./jobs/* /opt/bitnami/spark/jobs
14-
COPY ./input/* /opt/bitnami/spark/input
19+
# Copy Spark job scripts
20+
COPY jobs /opt/bitnami/spark/jobs/
1521

16-
# Set the working directory
17-
WORKDIR /opt/bitnami/spark
22+
# Copy input files
23+
COPY input /opt/bitnami/spark/input/
24+
25+
# Ensure correct permissions
26+
RUN chmod -R 777 /opt/bitnami/spark
1827

19-
# Optionally run the script automatically when the container starts
28+
# Default command to submit the Spark job
2029
CMD ["spark-submit", "--master", "spark://spark-master:7077", "/opt/bitnami/spark/jobs/main.py"]

jenkins/pipeline.jenkins

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ pipeline {
2424
stage('Build Spark Job Docker Image') {
2525
steps {
2626
script {
27-
// Copy requirements.txt to the Docker directory
27+
// Copy necessary files into the Docker directory
2828
sh '''
2929
cp Realtime_Streaming_Unstructured-Data/requirements.txt Realtime_Streaming_Unstructured-Data/docker/
30+
cp -r Realtime_Streaming_Unstructured-Data/input Realtime_Streaming_Unstructured-Data/docker/
3031
cd Realtime_Streaming_Unstructured-Data/docker
3132
sudo docker build -t spark-job -f Dockerfile .
3233
'''
@@ -49,8 +50,9 @@ pipeline {
4950
stage('Copy Input Files') {
5051
steps {
5152
script {
52-
// Copy any necessary input files to the Spark input directory
53+
// Ensure Spark master is running before copying files
5354
sh '''
55+
until sudo docker ps | grep -q spark-master; do sleep 5; done
5456
sudo docker cp Realtime_Streaming_Unstructured-Data/input/input_file.txt spark-master:/opt/bitnami/spark/input/
5557
'''
5658
}
@@ -62,7 +64,7 @@ pipeline {
6264
script {
6365
// Submit the Spark job
6466
sh '''
65-
sudo docker exec spark-job spark-submit --master spark://spark-master:7077 /opt/bitnami/spark/jobs/main.py
67+
sudo docker exec spark-master spark-submit --master spark://spark-master:7077 /opt/bitnami/spark/jobs/main.py
6668
'''
6769
}
6870
}
@@ -71,7 +73,6 @@ pipeline {
7173

7274
post {
7375
always {
74-
// Optionally stop and remove containers after the build
7576
script {
7677
sh '''
7778
cd Realtime_Streaming_Unstructured-Data/docker

0 commit comments

Comments
 (0)