Merge pull request #965 from flowln/fat_files_in_memory

Refactor a bit EnsureMetadataTask and calculate hashes in a incremental manner
This commit is contained in:
Sefa Eyeoglu 2022-08-28 11:03:12 +02:00 committed by GitHub
commit afcd669d2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 378 additions and 183 deletions

View File

@ -494,6 +494,8 @@ set(API_SOURCES
modplatform/modrinth/ModrinthAPI.cpp modplatform/modrinth/ModrinthAPI.cpp
modplatform/helpers/NetworkModAPI.h modplatform/helpers/NetworkModAPI.h
modplatform/helpers/NetworkModAPI.cpp modplatform/helpers/NetworkModAPI.cpp
modplatform/helpers/HashUtils.h
modplatform/helpers/HashUtils.cpp
) )
set(FTB_SOURCES set(FTB_SOURCES

View File

@ -3,81 +3,73 @@
#include <MurmurHash2.h> #include <MurmurHash2.h>
#include <QDebug> #include <QDebug>
#include "FileSystem.h"
#include "Json.h" #include "Json.h"
#include "minecraft/mod/Mod.h" #include "minecraft/mod/Mod.h"
#include "minecraft/mod/tasks/LocalModUpdateTask.h" #include "minecraft/mod/tasks/LocalModUpdateTask.h"
#include "modplatform/flame/FlameAPI.h" #include "modplatform/flame/FlameAPI.h"
#include "modplatform/flame/FlameModIndex.h" #include "modplatform/flame/FlameModIndex.h"
#include "modplatform/modrinth/ModrinthAPI.h" #include "modplatform/modrinth/ModrinthAPI.h"
#include "modplatform/modrinth/ModrinthPackIndex.h" #include "modplatform/modrinth/ModrinthPackIndex.h"
#include "net/NetJob.h" #include "net/NetJob.h"
#include "tasks/MultipleOptionsTask.h"
static ModPlatform::ProviderCapabilities ProviderCaps; static ModPlatform::ProviderCapabilities ProviderCaps;
static ModrinthAPI modrinth_api; static ModrinthAPI modrinth_api;
static FlameAPI flame_api; static FlameAPI flame_api;
EnsureMetadataTask::EnsureMetadataTask(Mod* mod, QDir dir, ModPlatform::Provider prov) : Task(nullptr), m_index_dir(dir), m_provider(prov) EnsureMetadataTask::EnsureMetadataTask(Mod* mod, QDir dir, ModPlatform::Provider prov)
: Task(nullptr), m_index_dir(dir), m_provider(prov), m_hashing_task(nullptr), m_current_task(nullptr)
{ {
auto hash = getHash(mod); auto hash_task = createNewHash(mod);
if (hash.isEmpty()) if (!hash_task)
emitFail(mod); return;
else connect(hash_task.get(), &Task::succeeded, [this, hash_task, mod] { m_mods.insert(hash_task->getResult(), mod); });
m_mods.insert(hash, mod); connect(hash_task.get(), &Task::failed, [this, hash_task, mod] { emitFail(mod, "", RemoveFromList::No); });
hash_task->start();
} }
EnsureMetadataTask::EnsureMetadataTask(QList<Mod*>& mods, QDir dir, ModPlatform::Provider prov) EnsureMetadataTask::EnsureMetadataTask(QList<Mod*>& mods, QDir dir, ModPlatform::Provider prov)
: Task(nullptr), m_index_dir(dir), m_provider(prov) : Task(nullptr), m_index_dir(dir), m_provider(prov), m_current_task(nullptr)
{ {
m_hashing_task = new ConcurrentTask(this, "MakeHashesTask", 10);
for (auto* mod : mods) { for (auto* mod : mods) {
if (!mod->valid()) { auto hash_task = createNewHash(mod);
emitFail(mod); if (!hash_task)
continue; continue;
} connect(hash_task.get(), &Task::succeeded, [this, hash_task, mod] { m_mods.insert(hash_task->getResult(), mod); });
connect(hash_task.get(), &Task::failed, [this, hash_task, mod] { emitFail(mod, "", RemoveFromList::No); });
auto hash = getHash(mod); m_hashing_task->addTask(hash_task);
if (hash.isEmpty()) {
emitFail(mod);
continue;
}
m_mods.insert(hash, mod);
} }
} }
QString EnsureMetadataTask::getHash(Mod* mod) Hashing::Hasher::Ptr EnsureMetadataTask::createNewHash(Mod* mod)
{ {
/* Here we create a mapping hash -> mod, because we need that relationship to parse the API routes */ if (!mod || !mod->valid() || mod->type() == Mod::MOD_FOLDER)
QByteArray jar_data; return nullptr;
try {
jar_data = FS::read(mod->fileinfo().absoluteFilePath());
} catch (FS::FileSystemException& e) {
qCritical() << QString("Failed to open / read JAR file of %1").arg(mod->name());
qCritical() << QString("Reason: ") << e.cause();
return {}; return Hashing::createHasher(mod->fileinfo().absoluteFilePath(), m_provider);
} }
switch (m_provider) { QString EnsureMetadataTask::getExistingHash(Mod* mod)
case ModPlatform::Provider::MODRINTH: { {
auto hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first(); // Check for already computed hashes
// (linear on the number of mods vs. linear on the size of the mod's JAR)
return QString(ProviderCaps.hash(ModPlatform::Provider::MODRINTH, jar_data, hash_type).toHex()); auto it = m_mods.keyValueBegin();
} while (it != m_mods.keyValueEnd()) {
case ModPlatform::Provider::FLAME: { if ((*it).second == mod)
QByteArray jar_data_treated; break;
for (char c : jar_data) { it++;
// CF-specific
if (!(c == 9 || c == 10 || c == 13 || c == 32))
jar_data_treated.push_back(c);
} }
return QString::number(MurmurHash2(jar_data_treated, jar_data_treated.length())); // We already have the hash computed
} if (it != m_mods.keyValueEnd()) {
return (*it).first;
} }
// No existing hash
return {}; return {};
} }
@ -127,11 +119,9 @@ void EnsureMetadataTask::executeTask()
} }
auto invalidade_leftover = [this] { auto invalidade_leftover = [this] {
QMutableHashIterator<QString, Mod*> mods_iter(m_mods); for (auto mod = m_mods.constBegin(); mod != m_mods.constEnd(); mod++)
while (mods_iter.hasNext()) { emitFail(mod.value(), mod.key(), RemoveFromList::No);
auto mod = mods_iter.next(); m_mods.clear();
emitFail(mod.value());
}
emitSucceeded(); emitSucceeded();
}; };
@ -178,20 +168,44 @@ void EnsureMetadataTask::executeTask()
version_task->start(); version_task->start();
} }
void EnsureMetadataTask::emitReady(Mod* m) void EnsureMetadataTask::emitReady(Mod* m, QString key, RemoveFromList remove)
{ {
if (!m) {
qCritical() << "Tried to mark a null mod as ready.";
if (!key.isEmpty())
m_mods.remove(key);
return;
}
qDebug() << QString("Generated metadata for %1").arg(m->name()); qDebug() << QString("Generated metadata for %1").arg(m->name());
emit metadataReady(m); emit metadataReady(m);
m_mods.remove(getHash(m)); if (remove == RemoveFromList::Yes) {
if (key.isEmpty())
key = getExistingHash(m);
m_mods.remove(key);
}
} }
void EnsureMetadataTask::emitFail(Mod* m) void EnsureMetadataTask::emitFail(Mod* m, QString key, RemoveFromList remove)
{ {
if (!m) {
qCritical() << "Tried to mark a null mod as failed.";
if (!key.isEmpty())
m_mods.remove(key);
return;
}
qDebug() << QString("Failed to generate metadata for %1").arg(m->name()); qDebug() << QString("Failed to generate metadata for %1").arg(m->name());
emit metadataFailed(m); emit metadataFailed(m);
m_mods.remove(getHash(m)); if (remove == RemoveFromList::Yes) {
if (key.isEmpty())
key = getExistingHash(m);
m_mods.remove(key);
}
} }
// Modrinth // Modrinth

View File

@ -1,12 +1,14 @@
#pragma once #pragma once
#include "ModIndex.h" #include "ModIndex.h"
#include "tasks/SequentialTask.h"
#include "net/NetJob.h" #include "net/NetJob.h"
#include "modplatform/helpers/HashUtils.h"
#include "tasks/ConcurrentTask.h"
class Mod; class Mod;
class QDir; class QDir;
class MultipleOptionsTask;
class EnsureMetadataTask : public Task { class EnsureMetadataTask : public Task {
Q_OBJECT Q_OBJECT
@ -17,6 +19,8 @@ class EnsureMetadataTask : public Task {
~EnsureMetadataTask() = default; ~EnsureMetadataTask() = default;
Task::Ptr getHashingTask() { return m_hashing_task; }
public slots: public slots:
bool abort() override; bool abort() override;
protected slots: protected slots:
@ -31,10 +35,16 @@ class EnsureMetadataTask : public Task {
auto flameProjectsTask() -> NetJob::Ptr; auto flameProjectsTask() -> NetJob::Ptr;
// Helpers // Helpers
void emitReady(Mod*); enum class RemoveFromList {
void emitFail(Mod*); Yes,
No
};
void emitReady(Mod*, QString key = {}, RemoveFromList = RemoveFromList::Yes);
void emitFail(Mod*, QString key = {}, RemoveFromList = RemoveFromList::Yes);
auto getHash(Mod*) -> QString; // Hashes and stuff
auto createNewHash(Mod*) -> Hashing::Hasher::Ptr;
auto getExistingHash(Mod*) -> QString;
private slots: private slots:
void modrinthCallback(ModPlatform::IndexedPack& pack, ModPlatform::IndexedVersion& ver, Mod*); void modrinthCallback(ModPlatform::IndexedPack& pack, ModPlatform::IndexedVersion& ver, Mod*);
@ -50,5 +60,6 @@ class EnsureMetadataTask : public Task {
ModPlatform::Provider m_provider; ModPlatform::Provider m_provider;
QHash<QString, ModPlatform::IndexedVersion> m_temp_versions; QHash<QString, ModPlatform::IndexedVersion> m_temp_versions;
ConcurrentTask* m_hashing_task;
NetJob* m_current_task; NetJob* m_current_task;
}; };

View File

@ -19,6 +19,8 @@
#include "modplatform/ModIndex.h" #include "modplatform/ModIndex.h"
#include <QCryptographicHash> #include <QCryptographicHash>
#include <QDebug>
#include <QIODevice>
namespace ModPlatform { namespace ModPlatform {
@ -53,34 +55,26 @@ auto ProviderCapabilities::hashType(Provider p) -> QStringList
} }
return {}; return {};
} }
auto ProviderCapabilities::hash(Provider p, QByteArray& data, QString type) -> QByteArray
auto ProviderCapabilities::hash(Provider p, QIODevice* device, QString type) -> QString
{ {
QCryptographicHash::Algorithm algo = QCryptographicHash::Sha1;
switch (p) { switch (p) {
case Provider::MODRINTH: { case Provider::MODRINTH: {
// NOTE: Data is the result of reading the entire JAR file! algo = (type == "sha1") ? QCryptographicHash::Sha1 : QCryptographicHash::Sha512;
// If 'type' was specified, we use that
if (!type.isEmpty() && hashType(p).contains(type)) {
if (type == "sha512")
return QCryptographicHash::hash(data, QCryptographicHash::Sha512);
else if (type == "sha1")
return QCryptographicHash::hash(data, QCryptographicHash::Sha1);
}
return QCryptographicHash::hash(data, QCryptographicHash::Sha512);
}
case Provider::FLAME:
// If 'type' was specified, we use that
if (!type.isEmpty() && hashType(p).contains(type)) {
if(type == "sha1")
return QCryptographicHash::hash(data, QCryptographicHash::Sha1);
else if (type == "md5")
return QCryptographicHash::hash(data, QCryptographicHash::Md5);
}
break; break;
} }
return {}; case Provider::FLAME:
algo = (type == "sha1") ? QCryptographicHash::Sha1 : QCryptographicHash::Md5;
break;
}
QCryptographicHash hash(algo);
if(!hash.addData(device))
qCritical() << "Failed to read JAR to create hash!";
Q_ASSERT(hash.result().length() == hash.hashLength(algo));
return { hash.result().toHex() };
} }
} // namespace ModPlatform } // namespace ModPlatform

View File

@ -24,6 +24,8 @@
#include <QVariant> #include <QVariant>
#include <QVector> #include <QVector>
class QIODevice;
namespace ModPlatform { namespace ModPlatform {
enum class Provider { enum class Provider {
@ -36,7 +38,7 @@ class ProviderCapabilities {
auto name(Provider) -> const char*; auto name(Provider) -> const char*;
auto readableName(Provider) -> QString; auto readableName(Provider) -> QString;
auto hashType(Provider) -> QStringList; auto hashType(Provider) -> QStringList;
auto hash(Provider, QByteArray&, QString type = "") -> QByteArray; auto hash(Provider, QIODevice*, QString type = "") -> QString;
}; };
struct ModpackAuthor { struct ModpackAuthor {

View File

@ -0,0 +1,81 @@
#include "HashUtils.h"
#include <QDebug>
#include <QFile>
#include "FileSystem.h"
#include <MurmurHash2.h>
namespace Hashing {
static ModPlatform::ProviderCapabilities ProviderCaps;
Hasher::Ptr createHasher(QString file_path, ModPlatform::Provider provider)
{
switch (provider) {
case ModPlatform::Provider::MODRINTH:
return createModrinthHasher(file_path);
case ModPlatform::Provider::FLAME:
return createFlameHasher(file_path);
default:
qCritical() << "[Hashing]"
<< "Unrecognized mod platform!";
return nullptr;
}
}
Hasher::Ptr createModrinthHasher(QString file_path)
{
return new ModrinthHasher(file_path);
}
Hasher::Ptr createFlameHasher(QString file_path)
{
return new FlameHasher(file_path);
}
void ModrinthHasher::executeTask()
{
QFile file(m_path);
try {
file.open(QFile::ReadOnly);
} catch (FS::FileSystemException& e) {
qCritical() << QString("Failed to open JAR file in %1").arg(m_path);
qCritical() << QString("Reason: ") << e.cause();
emitFailed("Failed to open file for hashing.");
return;
}
auto hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first();
m_hash = ProviderCaps.hash(ModPlatform::Provider::MODRINTH, &file, hash_type);
file.close();
if (m_hash.isEmpty()) {
emitFailed("Empty hash!");
} else {
emitSucceeded();
}
}
void FlameHasher::executeTask()
{
// CF-specific
auto should_filter_out = [](char c) { return (c == 9 || c == 10 || c == 13 || c == 32); };
std::ifstream file_stream(m_path.toStdString(), std::ifstream::binary);
// TODO: This is very heavy work, but apparently QtConcurrent can't use move semantics, so we can't boop this to another thread.
// How do we make this non-blocking then?
m_hash = QString::number(MurmurHash2(std::move(file_stream), 4 * MiB, should_filter_out));
if (m_hash.isEmpty()) {
emitFailed("Empty hash!");
} else {
emitSucceeded();
}
}
} // namespace Hashing

View File

@ -0,0 +1,47 @@
#pragma once
#include <QString>
#include "modplatform/ModIndex.h"
#include "tasks/Task.h"
namespace Hashing {
class Hasher : public Task {
public:
using Ptr = shared_qobject_ptr<Hasher>;
Hasher(QString file_path) : m_path(std::move(file_path)) {}
/* We can't really abort this task, but we can say we aborted and finish our thing quickly :) */
bool abort() override { return true; }
void executeTask() override = 0;
QString getResult() const { return m_hash; };
QString getPath() const { return m_path; };
protected:
QString m_hash;
QString m_path;
};
class FlameHasher : public Hasher {
public:
FlameHasher(QString file_path) : Hasher(file_path) { setObjectName(QString("FlameHasher: %1").arg(file_path)); }
void executeTask() override;
};
class ModrinthHasher : public Hasher {
public:
ModrinthHasher(QString file_path) : Hasher(file_path) { setObjectName(QString("ModrinthHasher: %1").arg(file_path)); }
void executeTask() override;
};
Hasher::Ptr createHasher(QString file_path, ModPlatform::Provider provider);
Hasher::Ptr createFlameHasher(QString file_path);
Hasher::Ptr createModrinthHasher(QString file_path);
} // namespace Hashing

View File

@ -2,11 +2,14 @@
#include "ModrinthAPI.h" #include "ModrinthAPI.h"
#include "ModrinthPackIndex.h" #include "ModrinthPackIndex.h"
#include "FileSystem.h"
#include "Json.h" #include "Json.h"
#include "ModDownloadTask.h" #include "ModDownloadTask.h"
#include "modplatform/helpers/HashUtils.h"
#include "tasks/ConcurrentTask.h"
static ModrinthAPI api; static ModrinthAPI api;
static ModPlatform::ProviderCapabilities ProviderCaps; static ModPlatform::ProviderCapabilities ProviderCaps;
@ -32,6 +35,8 @@ void ModrinthCheckUpdate::executeTask()
// Create all hashes // Create all hashes
QStringList hashes; QStringList hashes;
auto best_hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first(); auto best_hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first();
ConcurrentTask hashing_task(this, "MakeModrinthHashesTask", 10);
for (auto* mod : m_mods) { for (auto* mod : m_mods) {
if (!mod->enabled()) { if (!mod->enabled()) {
emit checkFailed(mod, tr("Disabled mods won't be updated, to prevent mod duplication issues!")); emit checkFailed(mod, tr("Disabled mods won't be updated, to prevent mod duplication issues!"));
@ -44,24 +49,24 @@ void ModrinthCheckUpdate::executeTask()
// need to generate a new hash if the current one is innadequate // need to generate a new hash if the current one is innadequate
// (though it will rarely happen, if at all) // (though it will rarely happen, if at all)
if (mod->metadata()->hash_format != best_hash_type) { if (mod->metadata()->hash_format != best_hash_type) {
QByteArray jar_data; auto hash_task = Hashing::createModrinthHasher(mod->fileinfo().absoluteFilePath());
connect(hash_task.get(), &Task::succeeded, [&] {
try { QString hash (hash_task->getResult());
jar_data = FS::read(mod->fileinfo().absoluteFilePath()); hashes.append(hash);
} catch (FS::FileSystemException& e) { mappings.insert(hash, mod);
qCritical() << QString("Failed to open / read JAR file of %1").arg(mod->name()); });
qCritical() << QString("Reason: ") << e.cause(); connect(hash_task.get(), &Task::failed, [this, hash_task] { failed("Failed to generate hash"); });
hashing_task.addTask(hash_task);
failed(e.what()); } else {
return;
}
hash = QString(ProviderCaps.hash(ModPlatform::Provider::MODRINTH, jar_data, best_hash_type).toHex());
}
hashes.append(hash); hashes.append(hash);
mappings.insert(hash, mod); mappings.insert(hash, mod);
} }
}
QEventLoop loop;
connect(&hashing_task, &Task::finished, [&loop]{ loop.quit(); });
hashing_task.start();
loop.exec();
auto* response = new QByteArray(); auto* response = new QByteArray();
auto job = api.latestVersions(hashes, best_hash_type, m_game_versions, m_loaders, response); auto job = api.latestVersions(hashes, best_hash_type, m_game_versions, m_loaders, response);

View File

@ -1,10 +1,11 @@
#include "ConcurrentTask.h" #include "ConcurrentTask.h"
#include <QDebug> #include <QDebug>
#include <QCoreApplication>
ConcurrentTask::ConcurrentTask(QObject* parent, QString task_name, int max_concurrent) ConcurrentTask::ConcurrentTask(QObject* parent, QString task_name, int max_concurrent)
: Task(parent), m_name(task_name), m_total_max_size(max_concurrent) : Task(parent), m_name(task_name), m_total_max_size(max_concurrent)
{} { setObjectName(task_name); }
ConcurrentTask::~ConcurrentTask() ConcurrentTask::~ConcurrentTask()
{ {
@ -36,8 +37,9 @@ void ConcurrentTask::executeTask()
{ {
m_total_size = m_queue.size(); m_total_size = m_queue.size();
for (int i = 0; i < m_total_max_size; i++) for (int i = 0; i < m_total_max_size; i++) {
startNext(); QMetaObject::invokeMethod(this, &ConcurrentTask::startNext, Qt::QueuedConnection);
}
} }
bool ConcurrentTask::abort() bool ConcurrentTask::abort()
@ -91,6 +93,8 @@ void ConcurrentTask::startNext()
setStepStatus(next->isMultiStep() ? next->getStepStatus() : next->getStatus()); setStepStatus(next->isMultiStep() ? next->getStepStatus() : next->getStatus());
updateState(); updateState();
QCoreApplication::processEvents();
next->start(); next->start();
} }

View File

@ -270,6 +270,10 @@ auto ModUpdateDialog::ensureMetadata() -> bool
connect(modrinth_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) { connect(modrinth_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) {
onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::MODRINTH); onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::MODRINTH);
}); });
if (modrinth_task->getHashingTask())
seq.addTask(modrinth_task->getHashingTask());
seq.addTask(modrinth_task); seq.addTask(modrinth_task);
} }
@ -279,6 +283,10 @@ auto ModUpdateDialog::ensureMetadata() -> bool
connect(flame_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) { connect(flame_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) {
onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::FLAME); onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::FLAME);
}); });
if (flame_task->getHashingTask())
seq.addTask(flame_task->getHashingTask());
seq.addTask(flame_task); seq.addTask(flame_task);
} }

View File

@ -1,86 +1,110 @@
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public // MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code. // domain. The author hereby disclaims copyright to this source code.
//
// Note - This code makes a few assumptions about how your machine behaves - // This was modified as to possibilitate it's usage incrementally.
// Those modifications are also placed in the public domain, and the author of
// 1. We can read a 4-byte value from any address without crashing // such modifications hereby disclaims copyright to this source code.
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "MurmurHash2.h" #include "MurmurHash2.h"
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline. // 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well. // They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995; const uint32_t m = 0x5bd1e995;
const int r = 24; const int r = 24;
// Initialize the hash to a 'random' value uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::function<bool(char)> filter_out)
{
auto* buffer = new char[buffer_size];
char data[4];
uint32_t h = seed ^ len; int read = 0;
uint32_t size = 0;
// We need the size without the filtered out characters before actually calculating the hash,
// to setup the initial value for the hash.
do {
file_stream.read(buffer, buffer_size);
read = file_stream.gcount();
for (int i = 0; i < read; i++) {
if (!filter_out(buffer[i]))
size += 1;
}
} while (!file_stream.eof());
file_stream.clear();
file_stream.seekg(0, file_stream.beg);
int index = 0;
// This forces a seed of 1.
IncrementalHashInfo info{ (uint32_t)1 ^ size, (uint32_t)size };
do {
file_stream.read(buffer, buffer_size);
read = file_stream.gcount();
for (int i = 0; i < read; i++) {
char c = buffer[i];
if (filter_out(c))
continue;
data[index] = c;
index = (index + 1) % 4;
// Mix 4 bytes at a time into the hash // Mix 4 bytes at a time into the hash
const auto* data = (const unsigned char*) key; if (index == 0)
while(len >= 4) FourBytes_MurmurHash2((unsigned char*)&data, info);
}
} while (!file_stream.eof());
// Do one last bit shuffle in the hash
FourBytes_MurmurHash2((unsigned char*)&data, info);
delete[] buffer;
file_stream.close();
return info.h;
}
void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev)
{ {
if (prev.len >= 4) {
// Not the final mix
uint32_t k = *(uint32_t*)data; uint32_t k = *(uint32_t*)data;
k *= m; k *= m;
k ^= k >> r; k ^= k >> r;
k *= m; k *= m;
h *= m; prev.h *= m;
h ^= k; prev.h ^= k;
data += 4*sizeof(char); prev.len -= 4;
len -= 4; } else {
} // The final mix
// Handle the last few bytes of the input array // Handle the last few bytes of the input array
switch (prev.len) {
switch(len) case 3:
{ prev.h ^= data[2] << 16;
case 3: h ^= data[2] << 16; case 2:
case 2: h ^= data[1] << 8; prev.h ^= data[1] << 8;
case 1: h ^= data[0]; case 1:
h *= m; prev.h ^= data[0];
prev.h *= m;
}; };
// Do a few final mixes of the hash to ensure the last few // Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated. // bytes are well-incorporated.
h ^= h >> 13; prev.h ^= prev.h >> 13;
h *= m; prev.h *= m;
h ^= h >> 15; prev.h ^= prev.h >> 15;
return h; prev.len = 0;
}
} }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------

View File

@ -1,30 +1,33 @@
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public // The original MurmurHash2 was written by Austin Appleby, and is placed in the
// domain. The author hereby disclaims copyright to this source code. // public domain. The author hereby disclaims copyright to this source code.
//
// This was modified as to possibilitate it's usage incrementally.
// Those modifications are also placed in the public domain, and the author of
// such modifications hereby disclaims copyright to this source code.
#pragma once #pragma once
//----------------------------------------------------------------------------- #include <cstdint>
// Platform-specific functions and macros #include <fstream>
// Microsoft Visual Studio #include <functional>
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed = 1 ); #define KiB 1024
#define MiB 1024*KiB
uint32_t MurmurHash2(
std::ifstream&& file_stream,
std::size_t buffer_size = 4*MiB,
std::function<bool(char)> filter_out = [](char) { return false; });
struct IncrementalHashInfo {
uint32_t h;
uint32_t len;
};
void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev);
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------