File tree Expand file tree Collapse file tree 1 file changed +7
-3
lines changed
Expand file tree Collapse file tree 1 file changed +7
-3
lines changed Original file line number Diff line number Diff line change 171171 },
172172 {
173173 "cell_type": "code",
174- "execution_count": 7 ,
174+ "execution_count": null ,
175175 "id": "342d2e07",
176176 "metadata": {
177177 "lines_to_next_cell": 1
178178 },
179179 "outputs": [],
180180 "source": [
181181 "def extract_contexts(texts, target_word, window=10):\n",
182- " \"\"\"Extract short context windows around target word.\"\"\"\n",
182+ " \"\"\"Extract short context windows around target word and its morphological variations .\"\"\"\n",
183183 " contexts = []\n",
184+ " # Create regex pattern to match target word and any Turkish suffixes\n",
185+ " pattern = re.compile(rf\"\\b{re.escape(target_word.lower())}\\w*\\b\")\n",
186+ " \n",
184187 " for t in texts:\n",
185188 " tokens = re.findall(r\"\\w+\", t.lower())\n",
186189 " for i, tok in enumerate(tokens):\n",
187- " if tok == target_word.lower():\n",
190+ " # Use regex to match the word and its variations\n",
191+ " if pattern.match(tok):\n",
188192 " start = max(0, i - window)\n",
189193 " end = min(len(tokens), i + window + 1)\n",
190194 " snippet = \" \".join(tokens[start:end])\n",
You can’t perform that action at this time.
0 commit comments