diff --git a/.gitignore b/.gitignore index 6769e21..864451f 100644 --- a/.gitignore +++ b/.gitignore @@ -157,4 +157,6 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ + +postgres-data/ \ No newline at end of file diff --git a/dags/names_insertion_dag.py b/dags/names_insertion_dag.py new file mode 100644 index 0000000..2ba3541 --- /dev/null +++ b/dags/names_insertion_dag.py @@ -0,0 +1,23 @@ +import pendulum +from airflow.models.dag import DAG +from airflow.operators.bash_operator import BashOperator + +# Set the timezone to Indian Standard Time (IST) +local_tz = pendulum.timezone('Asia/Kolkata') + +dag = DAG ( + 'names_insertion', + schedule_interval = '*/3 * * * *', + start_date = pendulum.datetime(2024, 3, 10, tz = local_tz), + catchup = False, + tags = ["POC", "POC1"] +) + +update_csv = BashOperator( + task_id = "update_csv", + bash_command = 'python /opt/airflow/py_scripts/namaste_world.py', + dag = dag, + retries = 3, +) + +update_csv \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d81a378 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,44 @@ +version: "3.8" +services: + postgres: + image: postgres + platform: linux/arm64 + env_file: + - .env + volumes: + - ./postgres-data:/var/lib/postgresql/data + scheduler: + image: apache/airflow + platform: linux/arm64 + entrypoint: ./scripts/entrypoint.sh + deploy: + restart_policy: + condition: on-failure + depends_on: + - postgres + env_file: + - .env + volumes: + - ./dags:/opt/airflow/dags + - ./logs:/opt/airflow/logs + - ./py_scripts:/opt/airflow/py_scripts + - ./s3:/opt/airflow/s3 + - ./scripts:/opt/airflow/scripts + webserver: + image: apache/airflow + platform: linux/arm64 + deploy: + restart_policy: + condition: on-failure + command: webserver + depends_on: + - scheduler + env_file: + - .env + volumes: + - ./dags:/opt/airflow/dags + - ./logs:/opt/airflow/logs + - ./s3:/opt/airflow/s3 + - ./py_scripts:/opt/airflow/py_scripts + ports: + - "8080:8080" diff --git a/logs/scheduler/latest b/logs/scheduler/latest new file mode 120000 index 0000000..aacf429 --- /dev/null +++ b/logs/scheduler/latest @@ -0,0 +1 @@ +2024-03-11 \ No newline at end of file diff --git a/py_scripts/namaste_world.py b/py_scripts/namaste_world.py new file mode 100644 index 0000000..1bc8c77 --- /dev/null +++ b/py_scripts/namaste_world.py @@ -0,0 +1,10 @@ +# A py script to add a random entry the best_player.csv + +import csv +import random + +with open('/opt/airflow/s3/raw_data/best_player.csv', 'a+', newline='\n') as f: + writer = csv.writer(f, delimiter=',', quotechar="'", quoting=csv.QUOTE_MINIMAL) + writer.writerow([random.choice(["Virat Kohli", "Rohit Sharma"])]) + +print('Name Inserted') \ No newline at end of file diff --git a/s3/raw_data/best_player.csv b/s3/raw_data/best_player.csv new file mode 100644 index 0000000..15ddf45 --- /dev/null +++ b/s3/raw_data/best_player.csv @@ -0,0 +1,7 @@ +player_name, date +Virat Kohli, 2024-01-01 +Rohit Sharma, 2024-01-02 +Virat Kohli, 2024-01-03 +Virat Kohli +Virat Kohli +Virat Kohli diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh new file mode 100755 index 0000000..8f0e17a --- /dev/null +++ b/scripts/entrypoint.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +airflow db migrate +airflow users create -r Admin -u admin -e admin@example.com -f admin -l user -p admin1234 +airflow scheduler \ No newline at end of file