wow-apps · dependabot · Aug 18, 2025 · Aug 18, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.12"
-boto3 = "^1.39.12"
+boto3 = "^1.40.11"
 constructs = "^10.4.2"
 pytest = "^8.4.1"
 aws-cdk-lib = "^2.206.0"

diff --git a/src/functions/chunk_manifest_builder/main.py b/src/functions/chunk_manifest_builder/main.py
@@ -25,15 +25,18 @@ def handler(event, context):
         response = table.get_item(Key={'id': interview_id})
 
         if 'Item' not in response:
-            raise Exception(f"Interview transcription not found: {interview_id}")
+            raise Exception(
+                f"Interview transcription not found: {interview_id}")
 
         interview_data = response['Item']
-        position_name = interview_data['position_name']  # Get from DynamoDB record
+        # Get from DynamoDB record
+        position_name = interview_data['position_name']
         position_description = interview_data['position_description']
 
         print(f"Retrieved from DynamoDB - position_name: {position_name}")
 
-        # Get full utterances from S3 (stored there due to DynamoDB size limits)
+        # Get full utterances from S3 (stored there due to DynamoDB size
+        # limits)
         s3 = boto3.client('s3')
         utterances_key = f"utterances/{interview_id}_utterances.json"
 
@@ -54,10 +57,13 @@ def handler(event, context):
             print(f"Using default bucket name: {bucket_name}")
 
         try:
-            utterances_response = s3.get_object(Bucket=bucket_name, Key=utterances_key)
-            utterances = json.loads(utterances_response['Body'].read().decode('utf-8'))
+            utterances_response = s3.get_object(
+                Bucket=bucket_name, Key=utterances_key)
+            utterances = json.loads(
+                utterances_response['Body'].read().decode('utf-8'))
         except s3.exceptions.NoSuchKey:
-            raise Exception(f"Utterances file not found: s3://{bucket_name}/{utterances_key}")
+            raise Exception(
+                f"Utterances file not found: s3://{bucket_name}/{utterances_key}")
         except Exception as e:
             raise Exception(f"Error reading utterances from S3: {str(e)}")
 
@@ -74,25 +80,23 @@ def handler(event, context):
 
         # Update processing status
         table.update_item(
-            Key={'id': interview_id},
+            Key={
+                'id': interview_id},
             UpdateExpression='SET processing_status = :status, chunk_count = :count',
             ExpressionAttributeValues={
                 ':status': 'chunked',
-                ':count': len(chunks)
-            }
-        )
-
-        return {
-            'statusCode': 200,
-            'interview_id': interview_id,
-            'position_name': position_name,
-            'position_description': position_description,
-            'bucket_name': bucket_name,
-            'chunk_count': len(chunks),
-            'total_duration_ms': chunks[-1]['end_ms'] if chunks else 0,
-            'chunks': [{'chunk_index': c['chunk_index'], 'start_ms': c['start_ms'], 'end_ms': c['end_ms']} for c in
-                       chunks]
-        }
+                ':count': len(chunks)})
+
+        return {'statusCode': 200,
+                'interview_id': interview_id,
+                'position_name': position_name,
+                'position_description': position_description,
+                'bucket_name': bucket_name,
+                'chunk_count': len(chunks),
+                'total_duration_ms': chunks[-1]['end_ms'] if chunks else 0,
+                'chunks': [{'chunk_index': c['chunk_index'],
+                            'start_ms': c['start_ms'],
+                            'end_ms': c['end_ms']} for c in chunks]}
 
     except Exception as e:
         print(f"Error building chunk manifest: {str(e)}")
@@ -135,7 +139,8 @@ def build_chunks(utterances, position_name, interview_id):
             # Check if we should end this chunk
             if chunk_duration >= TARGET_CHUNK_DURATION_MS:
                 # Try to find a natural break point (end of candidate answer)
-                break_point = find_natural_break_point(utterances, current_idx, chunk_start_time, MAX_CHUNK_DURATION_MS)
+                break_point = find_natural_break_point(
+                    utterances, current_idx, chunk_start_time, MAX_CHUNK_DURATION_MS)
                 if break_point != -1:
                     # Adjust chunk to natural break
                     chunk_utterances = utterances[current_start_idx:break_point + 1]
@@ -148,13 +153,16 @@ def build_chunks(utterances, position_name, interview_id):
 
         # Ensure minimum chunk size unless it's the last chunk
         if len(chunk_utterances) > 0:
-            chunk_duration = chunk_utterances[-1]['end_time_ms'] - chunk_start_time
-            if chunk_duration < MIN_CHUNK_DURATION_MS and current_idx < len(utterances) - 1:
+            chunk_duration = chunk_utterances[-1]['end_time_ms'] - \
+                chunk_start_time
+            if chunk_duration < MIN_CHUNK_DURATION_MS and current_idx < len(
+                    utterances) - 1:
                 # Extend chunk to minimum duration
                 while current_idx < len(utterances):
                     utterance = utterances[current_idx]
                     chunk_utterances.append(utterance)
-                    chunk_duration = utterance['end_time_ms'] - chunk_start_time
+                    chunk_duration = utterance['end_time_ms'] - \
+                        chunk_start_time
                     if chunk_duration >= MIN_CHUNK_DURATION_MS:
                         break
                     current_idx += 1
@@ -202,13 +210,19 @@ def build_chunks(utterances, position_name, interview_id):
     return chunks
 
 
-def find_natural_break_point(utterances, current_idx, chunk_start_time, max_duration_ms):
+def find_natural_break_point(
+        utterances,
+        current_idx,
+        chunk_start_time,
+        max_duration_ms):
     """
     Find a natural break point (end of candidate answer) within the maximum duration.
     """
 
     # Look ahead for natural break (candidate finishing an answer)
-    for i in range(current_idx, min(len(utterances), current_idx + 20)):  # Look at next 20 utterances
+    for i in range(
+            current_idx, min(
+            len(utterances), current_idx + 20)):  # Look at next 20 utterances
         utterance = utterances[i]
 
         # Check if we're past max duration
@@ -232,7 +246,8 @@ def build_chunk_text(chunk_utterances):
     lines = []
     for utterance in chunk_utterances:
         timestamp = format_timestamp(utterance['start_time_ms'])
-        lines.append(f"[{timestamp}] {utterance['speaker']}: {utterance['text']}")
+        lines.append(
+            f"[{timestamp}] {utterance['speaker']}: {utterance['text']}")
 
     return '\n'.join(lines)
 

diff --git a/src/functions/chunked_qa_extractor/main.py b/src/functions/chunked_qa_extractor/main.py
@@ -66,13 +66,12 @@ def handler(event, context):
         # Update chunk processing status
         chunk_table = dynamodb.Table('interview_chunks')
         chunk_table.update_item(
-            Key={'id': chunk_id},
+            Key={
+                'id': chunk_id},
             UpdateExpression='SET processing_status = :status, qa_count = :count',
             ExpressionAttributeValues={
                 ':status': 'qa_extracted',
-                ':count': saved_count
-            }
-        )
+                ':count': saved_count})
 
         return {
             'statusCode': 200,
@@ -88,13 +87,12 @@ def handler(event, context):
         # Update chunk with error status
         chunk_table = dynamodb.Table('interview_chunks')
         chunk_table.update_item(
-            Key={'id': chunk_id},
+            Key={
+                'id': chunk_id},
             UpdateExpression='SET processing_status = :status, error_message = :error',
             ExpressionAttributeValues={
                 ':status': 'extraction_failed',
-                ':error': str(e)
-            }
-        )
+                ':error': str(e)})
         raise
 
 
@@ -112,17 +110,17 @@ def extract_qa_from_chunk(bedrock_client, chunk_data):
         model_id = inference_profile_arn
 
     # Build the extraction prompt
-    system_prompt = """You are an expert at extracting structured Q&A data from interview transcripts. 
+    system_prompt = """You are an expert at extracting structured Q&A data from interview transcripts.
 You must return ONLY valid JSON with no additional text or explanation."""
 
-    user_prompt = f"""You'll receive a transcript segment with speakers and timestamps. 
+    user_prompt = f"""You'll receive a transcript segment with speakers and timestamps.
 Return ONLY valid JSON in this exact format:
 {{
   "qa": [
     {{
       "index": 0,
       "q_text": "Question text from interviewer",
-      "a_text": "Complete answer text from candidate", 
+      "a_text": "Complete answer text from candidate",
       "a_start_ms": 123000,
       "a_end_ms": 456000,
       "confidence": "high"

diff --git a/src/functions/qa_extractor/main.py b/src/functions/qa_extractor/main.py
@@ -70,16 +70,16 @@ def extract_qa_pairs(bedrock_client, transcript):
     """
 
     prompt = f"""
-    Please analyze the following Russian interview transcript and extract all question-answer pairs. 
-    
+    Please analyze the following Russian interview transcript and extract all question-answer pairs.
+
     The transcript contains dialogue between an Interviewer and a Candidate, marked with "Interviewer:" and "Candidate:" prefixes.
     The conversation is in Russian language.
-    
+
     Extract each question asked by the interviewer and the corresponding answer given by the candidate.
-    
+
     Return the result as a JSON array where each object has "question" and "answer" fields.
     Keep the original Russian text in both question and answer fields.
-    
+
     Rules:
     1. Only extract complete question-answer pairs
     2. Questions should be from the Interviewer
@@ -88,10 +88,10 @@ def extract_qa_pairs(bedrock_client, transcript):
     5. Skip small talk, introductions, and closing remarks
     6. Focus on technical questions and substantial answers
     7. Preserve the original Russian text - do not translate
-    
+
     Transcript:
     {transcript}
-    
+
     Return only the JSON array, no additional text or explanation.
     """
 
@@ -110,7 +110,8 @@ def extract_qa_pairs(bedrock_client, transcript):
         # Get the inference profile ARN from environment variables
         inference_profile_arn = os.environ.get('BEDROCK_INFERENCE_PROFILE_ARN')
         if not inference_profile_arn:
-            raise Exception("BEDROCK_INFERENCE_PROFILE_ARN environment variable not set")
+            raise Exception(
+                "BEDROCK_INFERENCE_PROFILE_ARN environment variable not set")
 
         response = bedrock_client.invoke_model(
             modelId=inference_profile_arn,

diff --git a/src/functions/s3_ingest_handler/main.py b/src/functions/s3_ingest_handler/main.py
@@ -19,7 +19,8 @@ def handler(event, context):
             bucket = record['s3']['bucket']['name']
             key = unquote_plus(record['s3']['object']['key'])
 
-            # Extract position_name from path (e.g., "python_senior/1.m4a" -> "python_senior")
+            # Extract position_name from path (e.g., "python_senior/1.m4a" ->
+            # "python_senior")
             path_parts = key.split('/')
             if len(path_parts) < 2:
                 print(f"Invalid path structure: {key}")