Skip to content

Commit 8757284

Browse files
authored
Merge a0d1682 into 751a49b
2 parents 751a49b + a0d1682 commit 8757284

File tree

7 files changed

+311
-0
lines changed

7 files changed

+311
-0
lines changed

src/.funcignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.git*
2+
.vscode
3+
__azurite_db*__.json
4+
__blobstorage__
5+
__queuestorage__
6+
local.settings.json
7+
test
8+
.venv

src/.gitignore

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
.hypothesis/
51+
.pytest_cache/
52+
53+
# Translations
54+
*.mo
55+
*.pot
56+
57+
# Django stuff:
58+
*.log
59+
local_settings.py
60+
db.sqlite3
61+
62+
# Flask stuff:
63+
instance/
64+
.webassets-cache
65+
66+
# Scrapy stuff:
67+
.scrapy
68+
69+
# Sphinx documentation
70+
docs/_build/
71+
72+
# PyBuilder
73+
target/
74+
75+
# Jupyter Notebook
76+
.ipynb_checkpoints
77+
78+
# IPython
79+
profile_default/
80+
ipython_config.py
81+
82+
# pyenv
83+
.python-version
84+
85+
# pipenv
86+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
88+
# having no cross-platform support, pipenv may install dependencies that don’t work, or not
89+
# install all needed dependencies.
90+
#Pipfile.lock
91+
92+
# celery beat schedule file
93+
celerybeat-schedule
94+
95+
# SageMath parsed files
96+
*.sage.py
97+
98+
# Environments
99+
.env
100+
.venv
101+
env/
102+
venv/
103+
ENV/
104+
env.bak/
105+
venv.bak/
106+
107+
# Spyder project settings
108+
.spyderproject
109+
.spyproject
110+
111+
# Rope project settings
112+
.ropeproject
113+
114+
# mkdocs documentation
115+
/site
116+
117+
# mypy
118+
.mypy_cache/
119+
.dmypy.json
120+
dmypy.json
121+
122+
# Pyre type checker
123+
.pyre/
124+
125+
# Azure Functions artifacts
126+
bin
127+
obj
128+
appsettings.json
129+
local.settings.json
130+
131+
# Azurite artifacts
132+
__blobstorage__
133+
__queuestorage__
134+
__azurite_db*__.json
135+
.python_packages

src/.vscode/extensions.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"recommendations": [
3+
"ms-azuretools.vscode-azurefunctions",
4+
"ms-python.python"
5+
]
6+
}

src/.vscode/launch.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Attach to Python Functions",
6+
"type": "debugpy",
7+
"request": "attach",
8+
"connect": {
9+
"host": "localhost",
10+
"port": 9091
11+
},
12+
"preLaunchTask": "func: host start"
13+
}
14+
]
15+
}

src/function_app.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import azure.functions as func
2+
import logging
3+
import json
4+
import os
5+
import uuid
6+
import io
7+
from pdfminer.high_level import extract_text
8+
from azure.cosmos import CosmosClient, PartitionKey
9+
10+
app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION)
11+
12+
def read_pdf_content(myblob):
13+
# Read the blob content into a BytesIO stream
14+
blob_bytes = myblob.read()
15+
pdf_stream = io.BytesIO(blob_bytes)
16+
17+
# Extract text from the PDF stream
18+
text = extract_text(pdf_stream)
19+
return text
20+
21+
def extract_invoice_data(text):
22+
lines = text.split('\n')
23+
invoice_data = {
24+
"id": generate_id(),
25+
"customer_name": "",
26+
"customer_email": "",
27+
"customer_address": "",
28+
"company_name": "",
29+
"company_phone": "",
30+
"company_address": "",
31+
"rentals": []
32+
}
33+
34+
for i, line in enumerate(lines):
35+
if "BILL TO:" in line:
36+
invoice_data["customer_name"] = lines[i + 1].strip()
37+
invoice_data["customer_email"] = lines[i + 2].strip()
38+
invoice_data["customer_address"] = lines[i + 3].strip()
39+
elif "Company Information:" in line:
40+
invoice_data["company_name"] = lines[i + 1].strip()
41+
invoice_data["company_phone"] = lines[i + 2].strip()
42+
invoice_data["company_address"] = lines[i + 3].strip()
43+
elif "Rental Date" in line:
44+
for j in range(i + 1, len(lines)):
45+
if lines[j].strip() == "":
46+
break
47+
rental_details = lines[j].split()
48+
rental_date = rental_details[0]
49+
title = " ".join(rental_details[1:-3])
50+
description = rental_details[-3]
51+
quantity = rental_details[-2]
52+
total_price = rental_details[-1]
53+
invoice_data["rentals"].append({
54+
"rental_date": rental_date,
55+
"title": title,
56+
"description": description,
57+
"quantity": quantity,
58+
"total_price": total_price
59+
})
60+
61+
logging.info("Successfully extracted invoice data.")
62+
return invoice_data
63+
64+
def save_invoice_data_to_cosmos(invoice_data, blob_name):
65+
try:
66+
endpoint = os.getenv("COSMOS_DB_ENDPOINT")
67+
key = os.getenv("COSMOS_DB_KEY")
68+
client = CosmosClient(endpoint, key)
69+
logging.info("Successfully connected to Cosmos DB.")
70+
except Exception as e:
71+
logging.error(f"Error connecting to Cosmos DB: {e}")
72+
return
73+
74+
database_name = 'ContosoDBAIDemo'
75+
container_name = 'Invoices'
76+
77+
try:
78+
database = client.create_database_if_not_exists(id=database_name)
79+
container = database.create_container_if_not_exists(
80+
id=container_name,
81+
partition_key=PartitionKey(path="/invoice_number"),
82+
offer_throughput=400
83+
)
84+
logging.info("Successfully ensured database and container exist.")
85+
except Exception as e:
86+
logging.error(f"Error creating database or container: {e}")
87+
return
88+
89+
try:
90+
response = container.upsert_item(invoice_data)
91+
logging.info(f"Saved processed invoice data to Cosmos DB: {response}")
92+
except Exception as e:
93+
logging.error(f"Error inserting item into Cosmos DB: {e}")
94+
95+
def generate_id():
96+
return str(uuid.uuid4())
97+
98+
@app.blob_trigger(arg_name="myblob", path="pdfinvoices/{name}",
99+
connection="contosostorageaidemo_STORAGE")
100+
def BlobTriggerContosoPDFInvoicesRaw(myblob: func.InputStream):
101+
logging.info(f"Python blob trigger function processed blob\n"
102+
f"Name: {myblob.name}\n"
103+
f"Blob Size: {myblob.length} bytes")
104+
105+
try:
106+
text = read_pdf_content(myblob)
107+
logging.info("Successfully read and extracted text from PDF.")
108+
except Exception as e:
109+
logging.error(f"Error reading PDF: {e}")
110+
return
111+
112+
logging.info(f"Extracted text from PDF: {text}")
113+
114+
try:
115+
invoice_data = extract_invoice_data(text)
116+
logging.info(f"Extracted invoice data: {invoice_data}")
117+
except Exception as e:
118+
logging.error(f"Error extracting invoice data: {e}")
119+
return
120+
121+
try:
122+
save_invoice_data_to_cosmos(invoice_data, myblob.name)
123+
logging.info("Successfully saved invoice data to Cosmos DB.")
124+
except Exception as e:
125+
logging.error(f"Error saving invoice data to Cosmos DB: {e}")

src/host.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "2.0",
3+
"logging": {
4+
"applicationInsights": {
5+
"samplingSettings": {
6+
"isEnabled": true,
7+
"excludedTypes": "Request"
8+
}
9+
}
10+
},
11+
"extensionBundle": {
12+
"id": "Microsoft.Azure.Functions.ExtensionBundle",
13+
"version": "[4.*, 5.0.0)"
14+
}
15+
}

src/requirements.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# DO NOT include azure-functions-worker in this file
2+
# The Python Worker is managed by Azure Functions platform
3+
# Manually managing azure-functions-worker may cause unexpected issues
4+
5+
azure-functions
6+
pdfminer.six
7+
azure-cosmos==4.3.0

0 commit comments

Comments
 (0)