diff --git a/launcher/CMakeLists.txt b/launcher/CMakeLists.txt index cff07b4b3..1a5579c83 100644 --- a/launcher/CMakeLists.txt +++ b/launcher/CMakeLists.txt @@ -494,6 +494,8 @@ set(API_SOURCES modplatform/modrinth/ModrinthAPI.cpp modplatform/helpers/NetworkModAPI.h modplatform/helpers/NetworkModAPI.cpp + modplatform/helpers/HashUtils.h + modplatform/helpers/HashUtils.cpp ) set(FTB_SOURCES diff --git a/launcher/modplatform/EnsureMetadataTask.cpp b/launcher/modplatform/EnsureMetadataTask.cpp index 60c54c4ef..42137398d 100644 --- a/launcher/modplatform/EnsureMetadataTask.cpp +++ b/launcher/modplatform/EnsureMetadataTask.cpp @@ -3,81 +3,73 @@ #include #include -#include "FileSystem.h" #include "Json.h" + #include "minecraft/mod/Mod.h" #include "minecraft/mod/tasks/LocalModUpdateTask.h" + #include "modplatform/flame/FlameAPI.h" #include "modplatform/flame/FlameModIndex.h" #include "modplatform/modrinth/ModrinthAPI.h" #include "modplatform/modrinth/ModrinthPackIndex.h" + #include "net/NetJob.h" -#include "tasks/MultipleOptionsTask.h" static ModPlatform::ProviderCapabilities ProviderCaps; static ModrinthAPI modrinth_api; static FlameAPI flame_api; -EnsureMetadataTask::EnsureMetadataTask(Mod* mod, QDir dir, ModPlatform::Provider prov) : Task(nullptr), m_index_dir(dir), m_provider(prov) +EnsureMetadataTask::EnsureMetadataTask(Mod* mod, QDir dir, ModPlatform::Provider prov) + : Task(nullptr), m_index_dir(dir), m_provider(prov), m_hashing_task(nullptr), m_current_task(nullptr) { - auto hash = getHash(mod); - if (hash.isEmpty()) - emitFail(mod); - else - m_mods.insert(hash, mod); + auto hash_task = createNewHash(mod); + if (!hash_task) + return; + connect(hash_task.get(), &Task::succeeded, [this, hash_task, mod] { m_mods.insert(hash_task->getResult(), mod); }); + connect(hash_task.get(), &Task::failed, [this, hash_task, mod] { emitFail(mod, "", RemoveFromList::No); }); + hash_task->start(); } EnsureMetadataTask::EnsureMetadataTask(QList& mods, QDir dir, ModPlatform::Provider prov) - : Task(nullptr), m_index_dir(dir), m_provider(prov) + : Task(nullptr), m_index_dir(dir), m_provider(prov), m_current_task(nullptr) { + m_hashing_task = new ConcurrentTask(this, "MakeHashesTask", 10); for (auto* mod : mods) { - if (!mod->valid()) { - emitFail(mod); + auto hash_task = createNewHash(mod); + if (!hash_task) continue; - } - - auto hash = getHash(mod); - if (hash.isEmpty()) { - emitFail(mod); - continue; - } - - m_mods.insert(hash, mod); + connect(hash_task.get(), &Task::succeeded, [this, hash_task, mod] { m_mods.insert(hash_task->getResult(), mod); }); + connect(hash_task.get(), &Task::failed, [this, hash_task, mod] { emitFail(mod, "", RemoveFromList::No); }); + m_hashing_task->addTask(hash_task); } } -QString EnsureMetadataTask::getHash(Mod* mod) +Hashing::Hasher::Ptr EnsureMetadataTask::createNewHash(Mod* mod) { - /* Here we create a mapping hash -> mod, because we need that relationship to parse the API routes */ - QByteArray jar_data; - try { - jar_data = FS::read(mod->fileinfo().absoluteFilePath()); - } catch (FS::FileSystemException& e) { - qCritical() << QString("Failed to open / read JAR file of %1").arg(mod->name()); - qCritical() << QString("Reason: ") << e.cause(); + if (!mod || !mod->valid() || mod->type() == Mod::MOD_FOLDER) + return nullptr; - return {}; + return Hashing::createHasher(mod->fileinfo().absoluteFilePath(), m_provider); +} + +QString EnsureMetadataTask::getExistingHash(Mod* mod) +{ + // Check for already computed hashes + // (linear on the number of mods vs. linear on the size of the mod's JAR) + auto it = m_mods.keyValueBegin(); + while (it != m_mods.keyValueEnd()) { + if ((*it).second == mod) + break; + it++; } - switch (m_provider) { - case ModPlatform::Provider::MODRINTH: { - auto hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first(); - - return QString(ProviderCaps.hash(ModPlatform::Provider::MODRINTH, jar_data, hash_type).toHex()); - } - case ModPlatform::Provider::FLAME: { - QByteArray jar_data_treated; - for (char c : jar_data) { - // CF-specific - if (!(c == 9 || c == 10 || c == 13 || c == 32)) - jar_data_treated.push_back(c); - } - - return QString::number(MurmurHash2(jar_data_treated, jar_data_treated.length())); - } + // We already have the hash computed + if (it != m_mods.keyValueEnd()) { + return (*it).first; } + // No existing hash return {}; } @@ -127,11 +119,9 @@ void EnsureMetadataTask::executeTask() } auto invalidade_leftover = [this] { - QMutableHashIterator mods_iter(m_mods); - while (mods_iter.hasNext()) { - auto mod = mods_iter.next(); - emitFail(mod.value()); - } + for (auto mod = m_mods.constBegin(); mod != m_mods.constEnd(); mod++) + emitFail(mod.value(), mod.key(), RemoveFromList::No); + m_mods.clear(); emitSucceeded(); }; @@ -178,20 +168,44 @@ void EnsureMetadataTask::executeTask() version_task->start(); } -void EnsureMetadataTask::emitReady(Mod* m) +void EnsureMetadataTask::emitReady(Mod* m, QString key, RemoveFromList remove) { + if (!m) { + qCritical() << "Tried to mark a null mod as ready."; + if (!key.isEmpty()) + m_mods.remove(key); + + return; + } + qDebug() << QString("Generated metadata for %1").arg(m->name()); emit metadataReady(m); - m_mods.remove(getHash(m)); + if (remove == RemoveFromList::Yes) { + if (key.isEmpty()) + key = getExistingHash(m); + m_mods.remove(key); + } } -void EnsureMetadataTask::emitFail(Mod* m) +void EnsureMetadataTask::emitFail(Mod* m, QString key, RemoveFromList remove) { + if (!m) { + qCritical() << "Tried to mark a null mod as failed."; + if (!key.isEmpty()) + m_mods.remove(key); + + return; + } + qDebug() << QString("Failed to generate metadata for %1").arg(m->name()); emit metadataFailed(m); - m_mods.remove(getHash(m)); + if (remove == RemoveFromList::Yes) { + if (key.isEmpty()) + key = getExistingHash(m); + m_mods.remove(key); + } } // Modrinth diff --git a/launcher/modplatform/EnsureMetadataTask.h b/launcher/modplatform/EnsureMetadataTask.h index 79db69760..a8b0851ee 100644 --- a/launcher/modplatform/EnsureMetadataTask.h +++ b/launcher/modplatform/EnsureMetadataTask.h @@ -1,12 +1,14 @@ #pragma once #include "ModIndex.h" -#include "tasks/SequentialTask.h" #include "net/NetJob.h" +#include "modplatform/helpers/HashUtils.h" + +#include "tasks/ConcurrentTask.h" + class Mod; class QDir; -class MultipleOptionsTask; class EnsureMetadataTask : public Task { Q_OBJECT @@ -17,6 +19,8 @@ class EnsureMetadataTask : public Task { ~EnsureMetadataTask() = default; + Task::Ptr getHashingTask() { return m_hashing_task; } + public slots: bool abort() override; protected slots: @@ -31,10 +35,16 @@ class EnsureMetadataTask : public Task { auto flameProjectsTask() -> NetJob::Ptr; // Helpers - void emitReady(Mod*); - void emitFail(Mod*); + enum class RemoveFromList { + Yes, + No + }; + void emitReady(Mod*, QString key = {}, RemoveFromList = RemoveFromList::Yes); + void emitFail(Mod*, QString key = {}, RemoveFromList = RemoveFromList::Yes); - auto getHash(Mod*) -> QString; + // Hashes and stuff + auto createNewHash(Mod*) -> Hashing::Hasher::Ptr; + auto getExistingHash(Mod*) -> QString; private slots: void modrinthCallback(ModPlatform::IndexedPack& pack, ModPlatform::IndexedVersion& ver, Mod*); @@ -50,5 +60,6 @@ class EnsureMetadataTask : public Task { ModPlatform::Provider m_provider; QHash m_temp_versions; + ConcurrentTask* m_hashing_task; NetJob* m_current_task; }; diff --git a/launcher/modplatform/ModIndex.cpp b/launcher/modplatform/ModIndex.cpp index 3c4b7887f..34fd9f307 100644 --- a/launcher/modplatform/ModIndex.cpp +++ b/launcher/modplatform/ModIndex.cpp @@ -19,6 +19,8 @@ #include "modplatform/ModIndex.h" #include +#include +#include namespace ModPlatform { @@ -53,34 +55,26 @@ auto ProviderCapabilities::hashType(Provider p) -> QStringList } return {}; } -auto ProviderCapabilities::hash(Provider p, QByteArray& data, QString type) -> QByteArray + +auto ProviderCapabilities::hash(Provider p, QIODevice* device, QString type) -> QString { + QCryptographicHash::Algorithm algo = QCryptographicHash::Sha1; switch (p) { case Provider::MODRINTH: { - // NOTE: Data is the result of reading the entire JAR file! - - // If 'type' was specified, we use that - if (!type.isEmpty() && hashType(p).contains(type)) { - if (type == "sha512") - return QCryptographicHash::hash(data, QCryptographicHash::Sha512); - else if (type == "sha1") - return QCryptographicHash::hash(data, QCryptographicHash::Sha1); - } - - return QCryptographicHash::hash(data, QCryptographicHash::Sha512); + algo = (type == "sha1") ? QCryptographicHash::Sha1 : QCryptographicHash::Sha512; + break; } case Provider::FLAME: - // If 'type' was specified, we use that - if (!type.isEmpty() && hashType(p).contains(type)) { - if(type == "sha1") - return QCryptographicHash::hash(data, QCryptographicHash::Sha1); - else if (type == "md5") - return QCryptographicHash::hash(data, QCryptographicHash::Md5); - } - + algo = (type == "sha1") ? QCryptographicHash::Sha1 : QCryptographicHash::Md5; break; } - return {}; + + QCryptographicHash hash(algo); + if(!hash.addData(device)) + qCritical() << "Failed to read JAR to create hash!"; + + Q_ASSERT(hash.result().length() == hash.hashLength(algo)); + return { hash.result().toHex() }; } } // namespace ModPlatform diff --git a/launcher/modplatform/ModIndex.h b/launcher/modplatform/ModIndex.h index dc297d038..89fe1c5cc 100644 --- a/launcher/modplatform/ModIndex.h +++ b/launcher/modplatform/ModIndex.h @@ -24,6 +24,8 @@ #include #include +class QIODevice; + namespace ModPlatform { enum class Provider { @@ -36,7 +38,7 @@ class ProviderCapabilities { auto name(Provider) -> const char*; auto readableName(Provider) -> QString; auto hashType(Provider) -> QStringList; - auto hash(Provider, QByteArray&, QString type = "") -> QByteArray; + auto hash(Provider, QIODevice*, QString type = "") -> QString; }; struct ModpackAuthor { diff --git a/launcher/modplatform/helpers/HashUtils.cpp b/launcher/modplatform/helpers/HashUtils.cpp new file mode 100644 index 000000000..a7bbaba50 --- /dev/null +++ b/launcher/modplatform/helpers/HashUtils.cpp @@ -0,0 +1,81 @@ +#include "HashUtils.h" + +#include +#include + +#include "FileSystem.h" + +#include + +namespace Hashing { + +static ModPlatform::ProviderCapabilities ProviderCaps; + +Hasher::Ptr createHasher(QString file_path, ModPlatform::Provider provider) +{ + switch (provider) { + case ModPlatform::Provider::MODRINTH: + return createModrinthHasher(file_path); + case ModPlatform::Provider::FLAME: + return createFlameHasher(file_path); + default: + qCritical() << "[Hashing]" + << "Unrecognized mod platform!"; + return nullptr; + } +} + +Hasher::Ptr createModrinthHasher(QString file_path) +{ + return new ModrinthHasher(file_path); +} + +Hasher::Ptr createFlameHasher(QString file_path) +{ + return new FlameHasher(file_path); +} + +void ModrinthHasher::executeTask() +{ + QFile file(m_path); + + try { + file.open(QFile::ReadOnly); + } catch (FS::FileSystemException& e) { + qCritical() << QString("Failed to open JAR file in %1").arg(m_path); + qCritical() << QString("Reason: ") << e.cause(); + + emitFailed("Failed to open file for hashing."); + return; + } + + auto hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first(); + m_hash = ProviderCaps.hash(ModPlatform::Provider::MODRINTH, &file, hash_type); + + file.close(); + + if (m_hash.isEmpty()) { + emitFailed("Empty hash!"); + } else { + emitSucceeded(); + } +} + +void FlameHasher::executeTask() +{ + // CF-specific + auto should_filter_out = [](char c) { return (c == 9 || c == 10 || c == 13 || c == 32); }; + + std::ifstream file_stream(m_path.toStdString(), std::ifstream::binary); + // TODO: This is very heavy work, but apparently QtConcurrent can't use move semantics, so we can't boop this to another thread. + // How do we make this non-blocking then? + m_hash = QString::number(MurmurHash2(std::move(file_stream), 4 * MiB, should_filter_out)); + + if (m_hash.isEmpty()) { + emitFailed("Empty hash!"); + } else { + emitSucceeded(); + } +} + +} // namespace Hashing diff --git a/launcher/modplatform/helpers/HashUtils.h b/launcher/modplatform/helpers/HashUtils.h new file mode 100644 index 000000000..38fddf039 --- /dev/null +++ b/launcher/modplatform/helpers/HashUtils.h @@ -0,0 +1,47 @@ +#pragma once + +#include + +#include "modplatform/ModIndex.h" +#include "tasks/Task.h" + +namespace Hashing { + +class Hasher : public Task { + public: + using Ptr = shared_qobject_ptr; + + Hasher(QString file_path) : m_path(std::move(file_path)) {} + + /* We can't really abort this task, but we can say we aborted and finish our thing quickly :) */ + bool abort() override { return true; } + + void executeTask() override = 0; + + QString getResult() const { return m_hash; }; + QString getPath() const { return m_path; }; + + protected: + QString m_hash; + QString m_path; +}; + +class FlameHasher : public Hasher { + public: + FlameHasher(QString file_path) : Hasher(file_path) { setObjectName(QString("FlameHasher: %1").arg(file_path)); } + + void executeTask() override; +}; + +class ModrinthHasher : public Hasher { + public: + ModrinthHasher(QString file_path) : Hasher(file_path) { setObjectName(QString("ModrinthHasher: %1").arg(file_path)); } + + void executeTask() override; +}; + +Hasher::Ptr createHasher(QString file_path, ModPlatform::Provider provider); +Hasher::Ptr createFlameHasher(QString file_path); +Hasher::Ptr createModrinthHasher(QString file_path); + +} // namespace Hashing diff --git a/launcher/modplatform/modrinth/ModrinthCheckUpdate.cpp b/launcher/modplatform/modrinth/ModrinthCheckUpdate.cpp index 79d8edf75..e2d275478 100644 --- a/launcher/modplatform/modrinth/ModrinthCheckUpdate.cpp +++ b/launcher/modplatform/modrinth/ModrinthCheckUpdate.cpp @@ -2,11 +2,14 @@ #include "ModrinthAPI.h" #include "ModrinthPackIndex.h" -#include "FileSystem.h" #include "Json.h" #include "ModDownloadTask.h" +#include "modplatform/helpers/HashUtils.h" + +#include "tasks/ConcurrentTask.h" + static ModrinthAPI api; static ModPlatform::ProviderCapabilities ProviderCaps; @@ -32,6 +35,8 @@ void ModrinthCheckUpdate::executeTask() // Create all hashes QStringList hashes; auto best_hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first(); + + ConcurrentTask hashing_task(this, "MakeModrinthHashesTask", 10); for (auto* mod : m_mods) { if (!mod->enabled()) { emit checkFailed(mod, tr("Disabled mods won't be updated, to prevent mod duplication issues!")); @@ -44,25 +49,25 @@ void ModrinthCheckUpdate::executeTask() // need to generate a new hash if the current one is innadequate // (though it will rarely happen, if at all) if (mod->metadata()->hash_format != best_hash_type) { - QByteArray jar_data; - - try { - jar_data = FS::read(mod->fileinfo().absoluteFilePath()); - } catch (FS::FileSystemException& e) { - qCritical() << QString("Failed to open / read JAR file of %1").arg(mod->name()); - qCritical() << QString("Reason: ") << e.cause(); - - failed(e.what()); - return; - } - - hash = QString(ProviderCaps.hash(ModPlatform::Provider::MODRINTH, jar_data, best_hash_type).toHex()); + auto hash_task = Hashing::createModrinthHasher(mod->fileinfo().absoluteFilePath()); + connect(hash_task.get(), &Task::succeeded, [&] { + QString hash (hash_task->getResult()); + hashes.append(hash); + mappings.insert(hash, mod); + }); + connect(hash_task.get(), &Task::failed, [this, hash_task] { failed("Failed to generate hash"); }); + hashing_task.addTask(hash_task); + } else { + hashes.append(hash); + mappings.insert(hash, mod); } - - hashes.append(hash); - mappings.insert(hash, mod); } + QEventLoop loop; + connect(&hashing_task, &Task::finished, [&loop]{ loop.quit(); }); + hashing_task.start(); + loop.exec(); + auto* response = new QByteArray(); auto job = api.latestVersions(hashes, best_hash_type, m_game_versions, m_loaders, response); diff --git a/launcher/tasks/ConcurrentTask.cpp b/launcher/tasks/ConcurrentTask.cpp index b88cfb131..ab7cbd036 100644 --- a/launcher/tasks/ConcurrentTask.cpp +++ b/launcher/tasks/ConcurrentTask.cpp @@ -1,10 +1,11 @@ #include "ConcurrentTask.h" #include +#include ConcurrentTask::ConcurrentTask(QObject* parent, QString task_name, int max_concurrent) : Task(parent), m_name(task_name), m_total_max_size(max_concurrent) -{} +{ setObjectName(task_name); } ConcurrentTask::~ConcurrentTask() { @@ -36,8 +37,9 @@ void ConcurrentTask::executeTask() { m_total_size = m_queue.size(); - for (int i = 0; i < m_total_max_size; i++) - startNext(); + for (int i = 0; i < m_total_max_size; i++) { + QMetaObject::invokeMethod(this, &ConcurrentTask::startNext, Qt::QueuedConnection); + } } bool ConcurrentTask::abort() @@ -91,6 +93,8 @@ void ConcurrentTask::startNext() setStepStatus(next->isMultiStep() ? next->getStepStatus() : next->getStatus()); updateState(); + QCoreApplication::processEvents(); + next->start(); } diff --git a/launcher/ui/dialogs/ModUpdateDialog.cpp b/launcher/ui/dialogs/ModUpdateDialog.cpp index d73c8ebb6..936fd99d2 100644 --- a/launcher/ui/dialogs/ModUpdateDialog.cpp +++ b/launcher/ui/dialogs/ModUpdateDialog.cpp @@ -270,6 +270,10 @@ auto ModUpdateDialog::ensureMetadata() -> bool connect(modrinth_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) { onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::MODRINTH); }); + + if (modrinth_task->getHashingTask()) + seq.addTask(modrinth_task->getHashingTask()); + seq.addTask(modrinth_task); } @@ -279,6 +283,10 @@ auto ModUpdateDialog::ensureMetadata() -> bool connect(flame_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) { onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::FLAME); }); + + if (flame_task->getHashingTask()) + seq.addTask(flame_task->getHashingTask()); + seq.addTask(flame_task); } diff --git a/libraries/murmur2/src/MurmurHash2.cpp b/libraries/murmur2/src/MurmurHash2.cpp index 3e52e6d15..b625efb1c 100644 --- a/libraries/murmur2/src/MurmurHash2.cpp +++ b/libraries/murmur2/src/MurmurHash2.cpp @@ -1,86 +1,110 @@ //----------------------------------------------------------------------------- // MurmurHash2 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. - -// Note - This code makes a few assumptions about how your machine behaves - - -// 1. We can read a 4-byte value from any address without crashing -// 2. sizeof(int) == 4 - -// And it has a few limitations - - -// 1. It will not work incrementally. -// 2. It will not produce the same results on little-endian and big-endian -// machines. +// +// This was modified as to possibilitate it's usage incrementally. +// Those modifications are also placed in the public domain, and the author of +// such modifications hereby disclaims copyright to this source code. #include "MurmurHash2.h" //----------------------------------------------------------------------------- -// Platform-specific functions and macros -// Microsoft Visual Studio +// 'm' and 'r' are mixing constants generated offline. +// They're not really 'magic', they just happen to work well. +const uint32_t m = 0x5bd1e995; +const int r = 24; -#if defined(_MSC_VER) - -#define BIG_CONSTANT(x) (x) - -// Other compilers - -#else // defined(_MSC_VER) - -#define BIG_CONSTANT(x) (x##LLU) - -#endif // !defined(_MSC_VER) - -//----------------------------------------------------------------------------- - -uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed ) +uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::function filter_out) { - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. + auto* buffer = new char[buffer_size]; + char data[4]; - const uint32_t m = 0x5bd1e995; - const int r = 24; + int read = 0; + uint32_t size = 0; - // Initialize the hash to a 'random' value + // We need the size without the filtered out characters before actually calculating the hash, + // to setup the initial value for the hash. + do { + file_stream.read(buffer, buffer_size); + read = file_stream.gcount(); + for (int i = 0; i < read; i++) { + if (!filter_out(buffer[i])) + size += 1; + } + } while (!file_stream.eof()); - uint32_t h = seed ^ len; + file_stream.clear(); + file_stream.seekg(0, file_stream.beg); - // Mix 4 bytes at a time into the hash - const auto* data = (const unsigned char*) key; - while(len >= 4) - { - uint32_t k = *(uint32_t*)data; + int index = 0; - k *= m; - k ^= k >> r; - k *= m; + // This forces a seed of 1. + IncrementalHashInfo info{ (uint32_t)1 ^ size, (uint32_t)size }; + do { + file_stream.read(buffer, buffer_size); + read = file_stream.gcount(); + for (int i = 0; i < read; i++) { + char c = buffer[i]; - h *= m; - h ^= k; + if (filter_out(c)) + continue; - data += 4*sizeof(char); - len -= 4; - } + data[index] = c; + index = (index + 1) % 4; - // Handle the last few bytes of the input array + // Mix 4 bytes at a time into the hash + if (index == 0) + FourBytes_MurmurHash2((unsigned char*)&data, info); + } + } while (!file_stream.eof()); - switch(len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; + // Do one last bit shuffle in the hash + FourBytes_MurmurHash2((unsigned char*)&data, info); - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. + delete[] buffer; - h ^= h >> 13; - h *= m; - h ^= h >> 15; + file_stream.close(); + return info.h; +} - return h; -} +void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev) +{ + if (prev.len >= 4) { + // Not the final mix + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + prev.h *= m; + prev.h ^= k; + + prev.len -= 4; + } else { + // The final mix + + // Handle the last few bytes of the input array + switch (prev.len) { + case 3: + prev.h ^= data[2] << 16; + case 2: + prev.h ^= data[1] << 8; + case 1: + prev.h ^= data[0]; + prev.h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + prev.h ^= prev.h >> 13; + prev.h *= m; + prev.h ^= prev.h >> 15; + + prev.len = 0; + } +} //----------------------------------------------------------------------------- diff --git a/libraries/murmur2/src/MurmurHash2.h b/libraries/murmur2/src/MurmurHash2.h index c7b83bcad..dc2c9681d 100644 --- a/libraries/murmur2/src/MurmurHash2.h +++ b/libraries/murmur2/src/MurmurHash2.h @@ -1,30 +1,33 @@ //----------------------------------------------------------------------------- -// MurmurHash2 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. +// The original MurmurHash2 was written by Austin Appleby, and is placed in the +// public domain. The author hereby disclaims copyright to this source code. +// +// This was modified as to possibilitate it's usage incrementally. +// Those modifications are also placed in the public domain, and the author of +// such modifications hereby disclaims copyright to this source code. #pragma once -//----------------------------------------------------------------------------- -// Platform-specific functions and macros +#include +#include -// Microsoft Visual Studio - -#if defined(_MSC_VER) && (_MSC_VER < 1600) - -typedef unsigned char uint8_t; -typedef unsigned int uint32_t; -typedef unsigned __int64 uint64_t; - -// Other compilers - -#else // defined(_MSC_VER) - -#include - -#endif // !defined(_MSC_VER) +#include //----------------------------------------------------------------------------- -uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed = 1 ); +#define KiB 1024 +#define MiB 1024*KiB + +uint32_t MurmurHash2( + std::ifstream&& file_stream, + std::size_t buffer_size = 4*MiB, + std::function filter_out = [](char) { return false; }); + +struct IncrementalHashInfo { + uint32_t h; + uint32_t len; +}; + +void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev); //-----------------------------------------------------------------------------