aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConrad Meyer <cem@FreeBSD.org>2019-04-04 23:32:27 +0000
committerConrad Meyer <cem@FreeBSD.org>2019-04-04 23:32:27 +0000
commitfff4eaebbff9cf9084e0cb4634fb9170e3cfb252 (patch)
tree2e88390777de5a4e55c0e4ead9a72cb4a40777ba
parent02c8dd7d72a4f1b274a3f1efe94142b6319f2887 (diff)
sort(1): randomcoll: Skip the memory allocation entirely
There's no reason to order based on strcmp of ASCII digests instead of memcmp of the raw digests. While here, remove collision fallback. If you collide two MD5s, they're probably the same string anyway. If robustness against MD5 collisions is desired, maybe we shouldn't use MD5. None of the behavior of sort -R is specified by POSIX, so we're free to implement this however we like. E.g., using a 128-bit counter and block cipher to generate unique indices for each line of input. PR: 230792 (2/many) Relnotes: This will change the sort order for a given dataset with a given seed. Other similarly breaking changes are planned. Sponsored by: Dell EMC Isilon
Notes
Notes: svn path=/head/; revision=345896
-rw-r--r--usr.bin/sort/coll.c21
1 files changed, 6 insertions, 15 deletions
diff --git a/usr.bin/sort/coll.c b/usr.bin/sort/coll.c
index 5b2dc07ec28e..979ba0d2c0ea 100644
--- a/usr.bin/sort/coll.c
+++ b/usr.bin/sort/coll.c
@@ -990,8 +990,7 @@ randomcoll(struct key_value *kv1, struct key_value *kv2,
{
struct bwstring *s1, *s2;
MD5_CTX ctx1, ctx2;
- char *b1, *b2;
- int cmp_res;
+ unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
s1 = kv1->k;
s2 = kv2->k;
@@ -1004,24 +1003,16 @@ randomcoll(struct key_value *kv1, struct key_value *kv2,
if (s1 == s2)
return (0);
- memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
- memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
+ memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
+ memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
- b1 = MD5End(&ctx1, NULL);
- b2 = MD5End(&ctx2, NULL);
- if (b1 == NULL || b2 == NULL)
- err(2, "MD5End");
- cmp_res = strcmp(b1,b2);
- sort_free(b1);
- sort_free(b2);
+ MD5Final(hash1, &ctx1);
+ MD5Final(hash2, &ctx2);
- if (!cmp_res)
- cmp_res = bwscoll(s1, s2, 0);
-
- return (cmp_res);
+ return (memcmp(hash1, hash2, sizeof(hash1)));
}
/*