Skip to content

Commit d0d482b

Browse files
authored
Merge pull request #221 from safarijv/commitByDefault
Add ability to declare Solr object with default commit policy
2 parents fcbf73e + f8080ec commit d0d482b

File tree

3 files changed

+118
-27
lines changed

3 files changed

+118
-27
lines changed

README.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,24 @@ If your Solr servers run off https
193193
solr = pysolr.SolrCloud(zookeeper, "collection", verify=path/to/cert.perm)
194194
195195
196+
Custom Commit Policy
197+
~~~~~~~~~~~~~~~~~~~~
198+
199+
.. code-block:: python
200+
201+
# Setup a Solr instance. The trailing slash is optional.
202+
# All request to solr will result in a commit
203+
solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', always_commit=True)
204+
205+
``always_commit`` signals to the Solr object to either commit or not commit by default for any solr request.
206+
Be sure to change this to True if you are upgrading from a version where the default policy was alway commit by default.
207+
208+
Functions like ``add`` and ``delete`` also still provide a way to override the default by passing the ``commit`` kwarg.
209+
210+
It is generally good practice to limit the amount of commits to solr.
211+
Excessive commits risk opening too many searcher or using too many system resources.
212+
213+
196214

197215
LICENSE
198216
=======

pysolr.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,8 @@ class Solr(object):
318318
solr = pysolr.Solr('http://localhost:8983/solr', results_cls=dict)
319319
320320
"""
321-
def __init__(self, url, decoder=None, timeout=60, results_cls=Results, search_handler='select', use_qt_param=False,
321+
322+
def __init__(self, url, decoder=None, timeout=60, results_cls=Results, search_handler='select', use_qt_param=False, always_commit=False,
322323
auth=None, verify=True):
323324
self.decoder = decoder or json.JSONDecoder()
324325
self.url = url
@@ -330,6 +331,7 @@ def __init__(self, url, decoder=None, timeout=60, results_cls=Results, search_ha
330331
self.use_qt_param = use_qt_param
331332
self.auth = auth
332333
self.verify = verify
334+
self.always_commit = always_commit
333335

334336
def get_session(self):
335337
if self.session is None:
@@ -447,7 +449,7 @@ def _mlt(self, params, handler='mlt'):
447449
def _suggest_terms(self, params, handler='terms'):
448450
return self._select(params, handler)
449451

450-
def _update(self, message, clean_ctrl_chars=True, commit=True, softCommit=False, waitFlush=None, waitSearcher=None,
452+
def _update(self, message, clean_ctrl_chars=True, commit=None, softCommit=False, waitFlush=None, waitSearcher=None,
451453
overwrite=None, handler='update'):
452454
"""
453455
Posts the given xml message to http://<self.url>/update and
@@ -471,6 +473,9 @@ def _update(self, message, clean_ctrl_chars=True, commit=True, softCommit=False,
471473

472474
path = '%s/' % path_handler
473475

476+
if commit is None:
477+
commit = self.always_commit
478+
474479
if commit:
475480
query_vars.append('commit=%s' % str(bool(commit)).lower())
476481
elif softCommit:
@@ -854,15 +859,15 @@ def _build_doc(self, doc, boost=None, fieldUpdates=None):
854859

855860
return doc_elem
856861

857-
def add(self, docs, boost=None, fieldUpdates=None, commit=True, softCommit=False, commitWithin=None, waitFlush=None,
862+
def add(self, docs, boost=None, fieldUpdates=None, commit=None, softCommit=False, commitWithin=None, waitFlush=None,
858863
waitSearcher=None, overwrite=None, handler='update'):
859864
"""
860865
Adds or updates documents.
861866
862867
Requires ``docs``, which is a list of dictionaries. Each key is the
863868
field name and each value is the value to index.
864869
865-
Optionally accepts ``commit``. Default is ``True``.
870+
Optionally accepts ``commit``. Default is ``None``. None signals to use default
866871
867872
Optionally accepts ``softCommit``. Default is ``False``.
868873
@@ -912,7 +917,7 @@ def add(self, docs, boost=None, fieldUpdates=None, commit=True, softCommit=False
912917
return self._update(m, commit=commit, softCommit=softCommit, waitFlush=waitFlush, waitSearcher=waitSearcher,
913918
overwrite=overwrite, handler=handler)
914919

915-
def delete(self, id=None, q=None, commit=True, softCommit=False, waitFlush=None, waitSearcher=None, handler='update'):
920+
def delete(self, id=None, q=None, commit=None, softCommit=False, waitFlush=None, waitSearcher=None, handler='update'):
916921
"""
917922
Deletes documents.
918923

tests/test_client.py

Lines changed: 90 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import unittest
77
from io import StringIO
88
from xml.etree import ElementTree
9+
import random
910

1011
from pysolr import (NESTED_DOC_KEY, Results, Solr, SolrError, clean_xml_string,
1112
force_bytes, force_unicode, json, safe_urlencode, sanitize,
@@ -137,7 +138,12 @@ def test_iter(self):
137138
self.assertEqual(to_iter[2], {'id': 3})
138139

139140

140-
class SolrTestCase(unittest.TestCase):
141+
class SolrTestCaseMixin(object):
142+
def get_solr(self, collection, timeout=60, always_commit=False):
143+
return Solr('http://localhost:8983/solr/%s' % collection, timeout=timeout, always_commit=always_commit)
144+
145+
146+
class SolrTestCase(unittest.TestCase, SolrTestCaseMixin):
141147
def setUp(self):
142148
super(SolrTestCase, self).setUp()
143149
self.solr = self.get_solr("core0")
@@ -219,12 +225,12 @@ def setUp(self):
219225
]
220226

221227
# Clear it.
222-
self.solr.delete(q='*:*')
228+
self.solr.delete(q='*:*', commit=True)
223229

224230
# Index our docs. Yes, this leans on functionality we're going to test
225231
# later & if it's broken, everything will catastrophically fail.
226232
# Such is life.
227-
self.solr.add(self.docs)
233+
self.solr.add(self.docs, commit=True)
228234

229235
# Mock the _send_request method on the solr instance so that we can
230236
# test that custom handlers are called correctly.
@@ -236,16 +242,17 @@ def assertURLStartsWith(self, URL, path):
236242
# slash handling are caught quickly:
237243
return self.assertEqual(URL, '%s/%s' % (self.solr.url.replace('/core0', ''), path))
238244

239-
def get_solr(self, collection, timeout=60):
240-
return Solr('http://localhost:8983/solr/%s' % collection, timeout=timeout)
245+
def get_solr(self, collection, timeout=60, always_commit=False):
246+
return Solr('http://localhost:8983/solr/%s' % collection, timeout=timeout, always_commit=always_commit)
241247

242248
def test_init(self):
243249
self.assertEqual(self.solr.url, 'http://localhost:8983/solr/core0')
244250
self.assertTrue(isinstance(self.solr.decoder, json.JSONDecoder))
245251
self.assertEqual(self.solr.timeout, 60)
246252

247-
custom_solr = self.get_solr("core0", timeout=17)
253+
custom_solr = self.get_solr("core0", timeout=17, always_commit=True)
248254
self.assertEqual(custom_solr.timeout, 17)
255+
self.assertEqual(custom_solr.always_commit, True)
249256

250257
def test_custom_results_class(self):
251258
solr = Solr('http://localhost:8983/solr/core0', results_cls=dict)
@@ -607,7 +614,7 @@ def test_add(self):
607614
'id': 'doc_7',
608615
'title': 'Another example doc',
609616
},
610-
])
617+
], commit=True)
611618
# add should default to 'update' handler
612619
args, kwargs = self.solr._send_request.call_args
613620
self.assertTrue(args[1].startswith('update/?'))
@@ -617,7 +624,7 @@ def test_add(self):
617624

618625
# add should support custom handlers
619626
with self.assertRaises(SolrError):
620-
self.solr.add([], handler='fakehandler')
627+
self.solr.add([], handler='fakehandler', commit=True)
621628
args, kwargs = self.solr._send_request.call_args
622629
self.assertTrue(args[1].startswith('fakehandler'))
623630

@@ -628,7 +635,7 @@ def test_add_with_boost(self):
628635
boost={'title': 10.0})
629636

630637
self.solr.add([{'id': 'doc_7', 'title': 'Spam doc doc'}],
631-
boost={'title': 0})
638+
boost={'title': 0}, commit=True)
632639

633640
res = self.solr.search('doc')
634641
self.assertEqual(len(res), 5)
@@ -640,7 +647,7 @@ def test_field_update_inc(self):
640647
updateList = []
641648
for i, doc in enumerate(originalDocs):
642649
updateList.append({'id': doc['id'], 'popularity': 5})
643-
self.solr.add(updateList, fieldUpdates={'popularity': 'inc'})
650+
self.solr.add(updateList, fieldUpdates={'popularity': 'inc'}, commit=True)
644651

645652
updatedDocs = self.solr.search('doc')
646653
self.assertEqual(len(updatedDocs), 3)
@@ -658,7 +665,7 @@ def test_field_update_set(self):
658665
updateList = []
659666
for i, doc in enumerate(originalDocs):
660667
updateList.append({'id': doc['id'], 'popularity': updated_popularity})
661-
self.solr.add(updateList, fieldUpdates={'popularity': 'set'})
668+
self.solr.add(updateList, fieldUpdates={'popularity': 'set'}, commit=True)
662669

663670
updatedDocs = self.solr.search('doc')
664671
self.assertEqual(len(updatedDocs), 3)
@@ -681,14 +688,14 @@ def test_field_update_add(self):
681688
'title': 'Multivalued doc 2',
682689
'word_ss': ['charlie', 'delta'],
683690
},
684-
])
691+
], commit=True)
685692

686693
originalDocs = self.solr.search('multivalued')
687694
self.assertEqual(len(originalDocs), 2)
688695
updateList = []
689696
for i, doc in enumerate(originalDocs):
690697
updateList.append({'id': doc['id'], 'word_ss': ['epsilon', 'gamma']})
691-
self.solr.add(updateList, fieldUpdates={'word_ss': 'add'})
698+
self.solr.add(updateList, fieldUpdates={'word_ss': 'add'}, commit=True)
692699

693700
updatedDocs = self.solr.search('multivalued')
694701
self.assertEqual(len(updatedDocs), 2)
@@ -701,7 +708,7 @@ def test_field_update_add(self):
701708

702709
def test_delete(self):
703710
self.assertEqual(len(self.solr.search('doc')), 3)
704-
self.solr.delete(id='doc_1')
711+
self.solr.delete(id='doc_1', commit=True)
705712
# delete should default to 'update' handler
706713
args, kwargs = self.solr._send_request.call_args
707714
self.assertTrue(args[1].startswith('update/?'))
@@ -711,16 +718,16 @@ def test_delete(self):
711718
self.assertEqual(len(self.solr.search('type_s:child')), 3)
712719
self.assertEqual(len(self.solr.search('type_s:grandchild')), 1)
713720
self.solr.delete(q='price:[0 TO 15]')
714-
self.solr.delete(q='type_s:parent')
721+
self.solr.delete(q='type_s:parent', commit=True)
715722
# one simple doc should remain
716723
# parent documents were also deleted but children remain as orphans
717724
self.assertEqual(len(self.solr.search('doc')), 1)
718725
self.assertEqual(len(self.solr.search('type_s:parent')), 0)
719726
self.assertEqual(len(self.solr.search('type_s:child')), 3)
720-
self.solr.delete(q='type_s:child OR type_s:grandchild')
727+
self.solr.delete(q='type_s:child OR type_s:grandchild', commit=True)
721728

722729
self.assertEqual(len(self.solr.search('*:*')), 1)
723-
self.solr.delete(q='*:*')
730+
self.solr.delete(q='*:*', commit=True)
724731
self.assertEqual(len(self.solr.search('*:*')), 0)
725732

726733
# Test delete() with `id' being a list.
@@ -739,12 +746,12 @@ def leaf_doc(doc):
739746
self.assertEqual(len(self.solr.search(leaf_q)), len(to_delete_docs))
740747
# Extract a random doc from the list, to later check it wasn't deleted.
741748
graced_doc_id = to_delete_ids.pop(random.randint(0, len(to_delete_ids) - 1))
742-
self.solr.delete(id=to_delete_ids)
749+
self.solr.delete(id=to_delete_ids, commit=True)
743750
# There should be only one left, our graced id
744751
self.assertEqual(len(self.solr.search(leaf_q)), 1)
745752
self.assertEqual(len(self.solr.search('id:%s' % graced_doc_id)), 1)
746753
# Now we can wipe the graced document too. None should be left.
747-
self.solr.delete(id=graced_doc_id)
754+
self.solr.delete(id=graced_doc_id, commit=True)
748755
self.assertEqual(len(self.solr.search(leaf_q)), 0)
749756

750757
# Can't delete when the list of documents is empty
@@ -758,7 +765,7 @@ def leaf_doc(doc):
758765

759766
# delete should support custom handlers
760767
with self.assertRaises(SolrError):
761-
self.solr.delete(id='doc_1', handler='fakehandler')
768+
self.solr.delete(id='doc_1', handler='fakehandler', commit=True)
762769
args, kwargs = self.solr._send_request.call_args
763770
self.assertTrue(args[1].startswith('fakehandler'))
764771

@@ -769,14 +776,29 @@ def test_commit(self):
769776
'id': 'doc_6',
770777
'title': 'Newly added doc',
771778
}
772-
], commit=False)
779+
])
773780
self.assertEqual(len(self.solr.search('doc')), 3)
774781
self.solr.commit()
775782
# commit should default to 'update' handler
776783
args, kwargs = self.solr._send_request.call_args
777784
self.assertTrue(args[1].startswith('update/?'))
778785
self.assertEqual(len(self.solr.search('doc')), 4)
779786

787+
def test_can_handles_default_commit_policy(self):
788+
expected_commits = [False, True, False]
789+
commit_arg = [False, True, None]
790+
791+
for expected_commit, arg in zip(expected_commits, commit_arg):
792+
self.solr.add([
793+
{
794+
'id': 'doc_6',
795+
'title': 'Newly added doc',
796+
}
797+
], commit=arg)
798+
args, _ = self.solr._send_request.call_args
799+
committing_in_url = 'commit' in args[1]
800+
self.assertEqual(expected_commit, committing_in_url)
801+
780802
def test_overwrite(self):
781803
self.assertEqual(len(self.solr.search('id:doc_overwrite_1')), 0)
782804
self.solr.add([
@@ -788,7 +810,7 @@ def test_overwrite(self):
788810
'id': 'doc_overwrite_1',
789811
'title': 'Kim is more awesome.',
790812
}
791-
], overwrite=False)
813+
], overwrite=False, commit=True)
792814
self.assertEqual(len(self.solr.search('id:doc_overwrite_1')), 2)
793815

794816
# commit should support custom handlers
@@ -941,3 +963,49 @@ def test_request_handler(self):
941963
# reset the values to what they were before the test
942964
self.solr.use_qt_param = before_test_use_qt_param
943965
self.solr.search_handler = before_test_search_handler
966+
967+
968+
class SolrCommitByDefaultTestCase(unittest.TestCase, SolrTestCaseMixin):
969+
970+
def setUp(self):
971+
super(SolrCommitByDefaultTestCase, self).setUp()
972+
self.solr = self.get_solr("core0", always_commit=True)
973+
self.docs = [
974+
{
975+
'id': 'doc_1',
976+
'title': 'Newly added doc',
977+
},
978+
{
979+
'id': 'doc_2',
980+
'title': 'Another example doc',
981+
},
982+
]
983+
984+
def test_does_not_require_commit(self):
985+
# add should not require commit arg
986+
self.solr.add(self.docs)
987+
988+
self.assertEqual(len(self.solr.search('doc')), 2)
989+
self.assertEqual(len(self.solr.search('example')), 1)
990+
991+
# update should not require commit arg
992+
self.docs[0]['title'] = "Updated Doc"
993+
self.docs[1]['title'] = "Another example updated doc"
994+
self.solr.add(self.docs, fieldUpdates={'title': 'set'})
995+
self.assertEqual(len(self.solr.search('updated')), 2)
996+
self.assertEqual(len(self.solr.search('example')), 1)
997+
998+
# delete should not require commit arg
999+
self.solr.delete(q='*:*')
1000+
self.assertEqual(len(self.solr.search('*')), 0)
1001+
1002+
def test_can_handles_default_commit_policy(self):
1003+
self.solr._send_request = Mock(wraps=self.solr._send_request)
1004+
expected_commits = [False, True, True]
1005+
commit_arg = [False, True, None]
1006+
1007+
for expected_commit, arg in zip(expected_commits, commit_arg):
1008+
self.solr.add(self.docs, commit=arg)
1009+
args, _ = self.solr._send_request.call_args
1010+
committing_in_url = 'commit' in args[1]
1011+
self.assertEqual(expected_commit, committing_in_url)

0 commit comments

Comments
 (0)