Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions tests/test_fingerprinting.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,29 @@ def test_snippets_similarity(self, regen=False):
)
expected_results_loc = self.get_test_loc("snippet-similarity-expected.json")
check_against_expected_json_file(results, expected_results_loc, regen=regen)

def test_snippets_similarity_2(self, regen=False):
# index-modified.js is index.js with a function removed
test_file1 = self.get_test_loc("snippets/index.js")
test_file2 = self.get_test_loc("snippets/index-modified.js")
results1 = get_file_fingerprint_hashes(test_file1, include_ngrams=True)
results2 = get_file_fingerprint_hashes(test_file2, include_ngrams=True)
results1_snippets = results1.get("snippets")
results2_snippets = results2.get("snippets")

results1_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(
results1_snippets
)
results2_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(results2_snippets)

matching_snippets = (
results1_snippet_mappings_by_snippets.keys() & results2_snippet_mappings_by_snippets.keys()
)

# jaccard coefficient
jc = len(matching_snippets) / ((len(results1_snippets) + len(results2_snippets)) / 2)

assert jc == 0.9666666666666667
assert len(results1_snippets) == 61
assert len(results2_snippets) == 59
assert len(matching_snippets) == 58
90 changes: 90 additions & 0 deletions tests/testfiles/fingerprinting/snippets/index-modified.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
var pSlice = Array.prototype.slice;
var objectKeys = require('./lib/keys.js');
var isArguments = require('./lib/is_arguments.js');

var deepEqual = module.exports = function (actual, expected, opts) {
if (!opts) opts = {};
// 7.1. All identical values are equivalent, as determined by ===.
if (actual === expected) {
return true;

} else if (actual instanceof Date && expected instanceof Date) {
return actual.getTime() === expected.getTime();

// 7.3. Other pairs that do not both pass typeof value == 'object',
// equivalence is determined by ==.
} else if (!actual || !expected || typeof actual != 'object' && typeof expected != 'object') {
return opts.strict ? actual === expected : actual == expected;

// 7.4. For all other Object pairs, including Array objects, equivalence is
// determined by having the same number of owned properties (as verified
// with Object.prototype.hasOwnProperty.call), the same set of keys
// (although not necessarily the same order), equivalent values for every
// corresponding key, and an identical 'prototype' property. Note: this
// accounts for both named and indexed properties on Arrays.
} else {
return objEquiv(actual, expected, opts);
}
}

function isBuffer (x) {
if (!x || typeof x !== 'object' || typeof x.length !== 'number') return false;
if (typeof x.copy !== 'function' || typeof x.slice !== 'function') {
return false;
}
if (x.length > 0 && typeof x[0] !== 'number') return false;
return true;
}

function objEquiv(a, b, opts) {
var i, key;
if (isUndefinedOrNull(a) || isUndefinedOrNull(b))
return false;
// an identical 'prototype' property.
if (a.prototype !== b.prototype) return false;
//~~~I've managed to break Object.keys through screwy arguments passing.
// Converting to array solves the problem.
if (isArguments(a)) {
if (!isArguments(b)) {
return false;
}
a = pSlice.call(a);
b = pSlice.call(b);
return deepEqual(a, b, opts);
}
if (isBuffer(a)) {
if (!isBuffer(b)) {
return false;
}
if (a.length !== b.length) return false;
for (i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false;
}
return true;
}
try {
var ka = objectKeys(a),
kb = objectKeys(b);
} catch (e) {//happens when one is a string literal and the other isn't
return false;
}
// having the same number of owned properties (keys incorporates
// hasOwnProperty)
if (ka.length != kb.length)
return false;
//the same set of keys (although not necessarily the same order),
ka.sort();
kb.sort();
//~~~cheap key test
for (i = ka.length - 1; i >= 0; i--) {
if (ka[i] != kb[i])
return false;
}
//equivalent values for every corresponding key, and
//~~~possibly expensive deep test
for (i = ka.length - 1; i >= 0; i--) {
key = ka[i];
if (!deepEqual(a[key], b[key], opts)) return false;
}
return typeof a === typeof b;
}
94 changes: 94 additions & 0 deletions tests/testfiles/fingerprinting/snippets/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
var pSlice = Array.prototype.slice;
var objectKeys = require('./lib/keys.js');
var isArguments = require('./lib/is_arguments.js');

var deepEqual = module.exports = function (actual, expected, opts) {
if (!opts) opts = {};
// 7.1. All identical values are equivalent, as determined by ===.
if (actual === expected) {
return true;

} else if (actual instanceof Date && expected instanceof Date) {
return actual.getTime() === expected.getTime();

// 7.3. Other pairs that do not both pass typeof value == 'object',
// equivalence is determined by ==.
} else if (!actual || !expected || typeof actual != 'object' && typeof expected != 'object') {
return opts.strict ? actual === expected : actual == expected;

// 7.4. For all other Object pairs, including Array objects, equivalence is
// determined by having the same number of owned properties (as verified
// with Object.prototype.hasOwnProperty.call), the same set of keys
// (although not necessarily the same order), equivalent values for every
// corresponding key, and an identical 'prototype' property. Note: this
// accounts for both named and indexed properties on Arrays.
} else {
return objEquiv(actual, expected, opts);
}
}

function isUndefinedOrNull(value) {
return value === null || value === undefined;
}

function isBuffer (x) {
if (!x || typeof x !== 'object' || typeof x.length !== 'number') return false;
if (typeof x.copy !== 'function' || typeof x.slice !== 'function') {
return false;
}
if (x.length > 0 && typeof x[0] !== 'number') return false;
return true;
}

function objEquiv(a, b, opts) {
var i, key;
if (isUndefinedOrNull(a) || isUndefinedOrNull(b))
return false;
// an identical 'prototype' property.
if (a.prototype !== b.prototype) return false;
//~~~I've managed to break Object.keys through screwy arguments passing.
// Converting to array solves the problem.
if (isArguments(a)) {
if (!isArguments(b)) {
return false;
}
a = pSlice.call(a);
b = pSlice.call(b);
return deepEqual(a, b, opts);
}
if (isBuffer(a)) {
if (!isBuffer(b)) {
return false;
}
if (a.length !== b.length) return false;
for (i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false;
}
return true;
}
try {
var ka = objectKeys(a),
kb = objectKeys(b);
} catch (e) {//happens when one is a string literal and the other isn't
return false;
}
// having the same number of owned properties (keys incorporates
// hasOwnProperty)
if (ka.length != kb.length)
return false;
//the same set of keys (although not necessarily the same order),
ka.sort();
kb.sort();
//~~~cheap key test
for (i = ka.length - 1; i >= 0; i--) {
if (ka[i] != kb[i])
return false;
}
//equivalent values for every corresponding key, and
//~~~possibly expensive deep test
for (i = ka.length - 1; i >= 0; i--) {
key = ka[i];
if (!deepEqual(a[key], b[key], opts)) return false;
}
return typeof a === typeof b;
}