Merge pull request #34 from twitterdev/arg_handling_updates

fionapigott · web-flow · commit d52a24f760d7 · 2018-06-12T15:46:33.000-06:00
Arg handling updates
diff --git a/searchtweets/api_utils.py b/searchtweets/api_utils.py
@@ -83,7 +83,7 @@ def change_to_count_endpoint(endpoint):
         return "https://" + '/'.join(filt_tokens) + '/' + "counts.json"
 
 
-def gen_rule_payload(pt_rule, results_per_call=500,
+def gen_rule_payload(pt_rule, results_per_call=None,
                      from_date=None, to_date=None, count_bucket=None,
                      tag=None,
                      stringify=True):
@@ -93,7 +93,7 @@ def gen_rule_payload(pt_rule, results_per_call=500,
 
     Args:
         pt_rule (str): The string version of a powertrack rule,
-            e.g., "kanye west has:geo". Accepts multi-line strings
+            e.g., "beyonce has:geo". Accepts multi-line strings
             for ease of entry.
         results_per_call (int): number of tweets or counts returned per API
         call. This maps to the ``maxResults`` search API parameter.
@@ -110,14 +110,16 @@ def gen_rule_payload(pt_rule, results_per_call=500,
     Example:
 
         >>> from searchtweets.utils import gen_rule_payload
-        >>> gen_rule_payload("kanye west has:geo",
+        >>> gen_rule_payload("beyonce has:geo",
             ...              from_date="2017-08-21",
             ...              to_date="2017-08-22")
-        '{"query":"kanye west has:geo","maxResults":100,"toDate":"201708220000","fromDate":"201708210000"}'
+        '{"query":"beyonce has:geo","maxResults":100,"toDate":"201708220000","fromDate":"201708210000"}'
     """
 
     pt_rule = ' '.join(pt_rule.split())  # allows multi-line strings
-    payload = {"query": pt_rule, "maxResults": results_per_call}
+    payload = {"query": pt_rule}
+    if results_per_call is not None and isinstance(results_per_call, int) is True:
+        payload["maxResults"] = results_per_call
     if to_date:
         payload["toDate"] = convert_utc_time(to_date)
     if from_date:
@@ -149,20 +151,30 @@ def gen_params_from_config(config_dict):
     else:
         endpoint = config_dict.get("endpoint")
 
+
+    def intify(arg):
+        if not isinstance(arg, int) and arg is not None:
+            return int(arg)
+        else:
+            return arg
+
+    # this parameter comes in as a string when it's parsed
+    results_per_call = intify(config_dict.get("results_per_call", None))
+
     rule = gen_rule_payload(pt_rule=config_dict["pt_rule"],
                             from_date=config_dict.get("from_date", None),
                             to_date=config_dict.get("to_date", None),
-                            results_per_call=int(config_dict.get("results_per_call")),
+                            results_per_call=results_per_call,
                             count_bucket=config_dict.get("count_bucket", None))
 
     _dict = {"endpoint": endpoint,
              "username": config_dict.get("username"),
              "password": config_dict.get("password"),
              "bearer_token": config_dict.get("bearer_token"),
              "rule_payload": rule,
-             "results_per_file": int(config_dict.get("results_per_file")),
-             "max_results": int(config_dict.get("max_results")),
-             "max_pages": config_dict.get("max_pages", None)}
+             "results_per_file": intify(config_dict.get("results_per_file")),
+             "max_results": intify(config_dict.get("max_results")),
+             "max_pages": intify(config_dict.get("max_pages", None))}
     return _dict
 
 
diff --git a/searchtweets/result_stream.py b/searchtweets/result_stream.py
@@ -155,7 +155,7 @@ class ResultStream:
     session_request_counter = 0
 
     def __init__(self, endpoint, rule_payload, username=None, password=None,
-                 bearer_token=None, max_results=1000,
+                 bearer_token=None, max_results=500,
                  tweetify=True, max_requests=None, **kwargs):
 
         self.username = username
diff --git a/searchtweets/utils.py b/searchtweets/utils.py
@@ -29,7 +29,7 @@
 
 def take(n, iterable):
     """Return first n items of the iterable as a list.
-    Originaly found in the Python itertools documentation.
+    Originally found in the Python itertools documentation.
 
     Args:
         n (int): number of items to return
diff --git a/tools/search_tweets.py b/tools/search_tweets.py
@@ -89,16 +89,13 @@ def parse_cmd_args():
 
     argparser.add_argument("--results-per-call",
                            dest="results_per_call",
-                           default=100,
                            help="Number of results to return per call "
                                 "(default 100; max 500) - corresponds to "
                                 "'maxResults' in the API")
 
     argparser.add_argument("--max-results", dest="max_results",
-                           default=500,
                            type=int,
-                           help="Maximum number of Tweets or Counts to return for this "
-                                "session (defaults to 500)")
+                           help="Maximum number of Tweets or Counts to return for this session")
 
     argparser.add_argument("--max-pages",
                            dest="max_pages",
@@ -108,7 +105,7 @@ def parse_cmd_args():
                            "use for this session.")
 
     argparser.add_argument("--results-per-file", dest="results_per_file",
-                           default=0,
+                           default=None,
                            type=int,
                            help="Maximum tweets to save per file.")
 
@@ -137,6 +134,10 @@ def parse_cmd_args():
     return argparser
 
 
+def _filter_sensitive_args(dict_):
+    sens_args = ("password", "consumer_key", "consumer_secret", "bearer_token")
+    return {k: v for k, v in dict_.items() if k not in sens_args}
+
 def main():
     args_dict = vars(parse_cmd_args().parse_args())
     if args_dict.get("debug") is True:
@@ -149,8 +150,8 @@ def main():
     else:
         configfile_dict = {}
 
-    logger.debug("config file dict:")
-    logger.debug(json.dumps(configfile_dict, indent=4))
+    logger.debug("config file ({}) arguments sans sensitive args:".format(args_dict["config_filename"]))
+    logger.debug(json.dumps(_filter_sensitive_args(configfile_dict), indent=4))
 
     creds_dict = load_credentials(filename=args_dict["credential_file"],
                                   account_type=args_dict["account_type"],
@@ -163,24 +164,26 @@ def main():
                               dict_filter(args_dict),
                               dict_filter(creds_dict))
 
-    logger.debug("combined dict (cli, config, creds):")
-    logger.debug(json.dumps(config_dict, indent=4))
+    logger.debug("combined dict (cli, config, creds) sans password:")
+    logger.debug(json.dumps(_filter_sensitive_args(config_dict), indent=4))
 
     if len(dict_filter(config_dict).keys() & REQUIRED_KEYS) < len(REQUIRED_KEYS):
         print(REQUIRED_KEYS - dict_filter(config_dict).keys())
         logger.error("ERROR: not enough arguments for the program to work")
         sys.exit(1)
 
     stream_params = gen_params_from_config(config_dict)
+    logger.debug("full arguments passed to the ResultStream object sans password")
+    logger.debug(json.dumps(_filter_sensitive_args(stream_params), indent=4))
 
     rs = ResultStream(tweetify=False, **stream_params)
 
     logger.debug(str(rs))
 
     if config_dict.get("filename_prefix") is not None:
         stream = write_result_stream(rs,
-                                     filename_prefix=config_dict["filename_prefix"],
-                                     results_per_file=config_dict["results_per_file"])
+                                     filename_prefix=config_dict.get("filename_prefix"),
+                                     results_per_file=config_dict.get("results_per_file"))
     else:
         stream = rs.stream()