Skip to content

Commit 65378d4

Browse files
authored
Consolidate PRs into single branch (#219)
* Support xor_value in returned strings. Extend the tuple that represents an instance of a match to include the xor key. This breaks all existing scripts that are unpacking the tuple, which I'm not very happy with. This also updates the submodule to use the latest master so that I can get the new xor key values. Also, adds a fix to get yara building here by defining BUCKETS_128 and CHECKSUM_1B as needed by the new tlsh stuff (discussed with @metthal). * Add two new objects to yara-python. Add a StringMatch object, which represents a matched string. It has an identifier member (this is the string identifier, eg: $a) and an instances member which contains a list of matched string instances. It also keeps track of the string flags internally but does not expose them directly as the string flags contain things that are internal to YARA (eg: STRING_FLAGS_FITS_IN_ATOM). The reason it keeps track of the string modifiers is so that it can be extended to allow users to take action based upon certain flags. For example, there is a "is_xor()" member on StringMatch which will return True if the string is using the xor modifier. This way users can call another method (discussed below) to get the plaintext string back. Add a StringMatchInstance object which represents an instance of a matched string. It contains the offset, matched data and the xor key used to match the string (this is ALWAYS set, even to 0 if the string is not an xor string). There is a "plaintext()" method on the StringMatchInstance objects which will return a new bytes object with the xor key applied. This allows users to do something like this: ``` print(instance.plaintext() if string.is_xor() else instance.matched_data) ``` Technically, the plaintext() method will return the matched_data if the xor_key is 0 so they don't need to do the conditional but this allows them a nice way to know if the xor_key is worth recording along with the plaintext. I decided not to implement richcompare for these new objects as it isn't entirely clear what I would want to do the comparison on. * Add "matched_length" member. Add a "matched_length" member to match instances. This is useful when the "matched_data" member is a subset of the actually matched data. Add a test for this that sets the max_match_data config to 2 and then checks to make sure the "matched_length" and "matched_data" members are correct. * Add modules list to yara object. Add support for getting the list of available modules. It is available just by accessing the yara.modules attribute, which contains a list of available modules. >>> print('\n'.join(yara.modules)) tests pe elf math time console >>> Note: This commit also brings in the necessary defines to build the authenticode parser, which is also done in the xor_value branch. Also, this commit updates the yara submodule which will likely overwrite the changes done in the xor_value so I recommend updating the submodule after both are merged. * Update yara to 65feab41d4cbf4a75338561d8506fc1fa9fa6ba6. * Fix test using \t in a regex. * Fix build on Windows in appveyor. * Actually fix appveyor builds on windows?
1 parent 42ccdd3 commit 65378d4

File tree

5 files changed

+531
-16
lines changed

5 files changed

+531
-16
lines changed

appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ build_script:
155155
- "%CMD_IN_ENV% python setup.py build_ext --enable-cuckoo --enable-dotnet
156156
-L../jansson-%JANSSON_VERSION%/build/lib/Release;../openssl/lib
157157
-I../jansson-%JANSSON_VERSION%/build/include;../openssl/include
158-
-DHASH_MODULE,HAVE_LIBCRYPTO
158+
-DHASH_MODULE,HAVE_LIBCRYPTO,BUCKETS_128,CHECKSUM_1B
159159
-llibcrypto"
160160

161161
after_build:

setup.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ def run(self):
188188

189189
exclusions = []
190190

191+
# Needed to build tlsh
192+
module.define_macros.extend([('BUCKETS_128', 1), ('CHECKSUM_1B', 1)])
193+
194+
# Needed to build authenticode parser
195+
module.libraries.append('ssl')
196+
191197
for define in self.define or []:
192198
module.define_macros.append(define)
193199

@@ -371,4 +377,5 @@ def run(self):
371377
ext_modules=[Extension(
372378
name='yara',
373379
include_dirs=['yara/libyara/include', 'yara/libyara/', '.'],
380+
define_macros=[('BUCKETS_128', 1), ('CHECKSUM_1B', 1)],
374381
sources=['yara-python.c'])])

tests.py

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,12 @@ def runReTest(self, test):
306306
matches = rule.match(data=string)
307307
if expected_result == SUCCEED:
308308
self.assertTrue(matches)
309-
_, _, matching_string = matches[0].strings[0]
309+
matching_string = matches[0].strings[0]
310+
instance = matching_string.instances[0]
310311
if sys.version_info[0] >= 3:
311-
self.assertTrue(matching_string == bytes(test[3], 'utf-8'))
312+
self.assertTrue(instance.matched_data == bytes(test[3], 'utf-8'))
312313
else:
313-
self.assertTrue(matching_string == test[3])
314+
self.assertTrue(instance.matched_data == test[3])
314315
else:
315316
self.assertFalse(matches)
316317

@@ -559,9 +560,13 @@ def testHexStrings(self):
559560
matches = rules.match(data='abbb')
560561

561562
if sys.version_info[0] >= 3:
562-
self.assertTrue(matches[0].strings == [(0, '$a', bytes('ab', 'utf-8'))])
563+
self.assertTrue(matches[0].strings[0].identifier == '$a')
564+
self.assertTrue(matches[0].strings[0].instances[0].offset == 0)
565+
self.assertTrue(matches[0].strings[0].instances[0].matched_data == bytes('ab', 'utf-8'))
563566
else:
564-
self.assertTrue(matches[0].strings == [(0, '$a', 'ab')])
567+
self.assertTrue(matches[0].strings[0].identifier == '$a')
568+
self.assertTrue(matches[0].strings[0].instances[0].offset == 0)
569+
self.assertTrue(matches[0].strings[0].instances[0].matched_data == 'ab')
565570

566571
def testCount(self):
567572

@@ -650,6 +655,58 @@ def testFor(self):
650655
'rule test { strings: $a = "ssi" condition: for all i in (1..#a) : (@a[i] == 5) }',
651656
], 'mississipi')
652657

658+
def testXorKey(self):
659+
660+
global rule_data
661+
rule_data = None
662+
663+
def callback(data):
664+
global rule_data
665+
rule_data = data
666+
return yara.CALLBACK_CONTINUE
667+
668+
r = yara.compile(source='rule test { strings: $a = "dummy" xor(1-2) condition: $a }')
669+
r.match(data='etllxfwoo{', callback=callback)
670+
671+
self.assertTrue(rule_data['matches'])
672+
self.assertEqual(rule_data['rule'], 'test')
673+
self.assertEqual(len(rule_data['strings']), 1)
674+
string = rule_data['strings'][0]
675+
self.assertEqual(len(string.instances), 2)
676+
self.assertEqual(string.instances[0].xor_key, 1)
677+
self.assertEqual(string.instances[1].xor_key, 2)
678+
679+
# Make sure plaintext() works.
680+
self.assertTrue(string.instances[0].plaintext() == b'dummy')
681+
682+
# Test that the xor_key for matched strings is 0 if the string is not an xor
683+
# string. We always want to make sure this is set!
684+
def testXorKeyNoXorString(self):
685+
686+
global rule_data
687+
rule_data = None
688+
689+
def callback(data):
690+
global rule_data
691+
rule_data = data
692+
return yara.CALLBACK_CONTINUE
693+
694+
r = yara.compile(source='rule test { strings: $a = "dummy" condition: $a }')
695+
r.match(data='dummy', callback=callback)
696+
697+
self.assertTrue(rule_data['matches'])
698+
self.assertEqual(rule_data['rule'],'test')
699+
self.assertEqual(len(rule_data['strings']), 1)
700+
self.assertEqual(rule_data['strings'][0].instances[0].xor_key, 0)
701+
702+
def testMatchedLength(self):
703+
yara.set_config(max_match_data=2)
704+
r = yara.compile(source='rule test { strings: $a = "dummy" condition: $a }')
705+
matches = r.match(data='dummy')
706+
self.assertEqual(matches[0].strings[0].instances[0].matched_length, 5)
707+
self.assertEqual(matches[0].strings[0].instances[0].matched_data, b'du')
708+
yara.set_config(max_match_data=512)
709+
653710
def testRE(self):
654711

655712
self.assertTrueRules([
@@ -661,8 +718,8 @@ def testRE(self):
661718
'rule test { strings: $a = /(M|N)iss/ nocase condition: $a }',
662719
'rule test { strings: $a = /[M-N]iss/ nocase condition: $a }',
663720
'rule test { strings: $a = /(Mi|ssi)ssippi/ nocase condition: $a }',
664-
'rule test { strings: $a = /ppi\tmi/ condition: $a }',
665-
r'rule test { strings: $a = /ppi\.mi/ condition: $a }',
721+
r'rule test { strings: $a = /ppi\tmi/ condition: $a }',
722+
'rule test { strings: $a = /ppi\.mi/ condition: $a }',
666723
'rule test { strings: $a = /^mississippi/ fullword condition: $a }',
667724
'rule test { strings: $a = /mississippi.*mississippi$/s condition: $a }',
668725
], 'mississippi\tmississippi.mississippi\nmississippi')

yara

Submodule yara updated 60 files

0 commit comments

Comments
 (0)