star-hitran

Load line-by-line data from the HITRAN database
git clone git://git.meso-star.fr/star-hitran.git
Log | Files | Refs | README | LICENSE

commit f4377e10eb55475673778da0d76441a961741a85
parent 1a9b9a6db4d011d41dd40d57dba65a3f38cb339d
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Mon,  9 Mar 2026 17:16:01 +0100

Add a function that calculates the metadata signature

So that the caller can determine whether one set of metadata is
identical to another.

The signature returned depends solely on the data relating to the
molecules and their isotopes. It does not depend on their loading order.

Diffstat:
Msrc/shtr.h | 8++++++++
Msrc/shtr_isotope_metadata.c | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/test_shtr_isotope_metadata.c | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 213 insertions(+), 0 deletions(-)

diff --git a/src/shtr.h b/src/shtr.h @@ -19,6 +19,7 @@ #ifndef SHTR_H #define SHTR_H +#include <rsys/hash.h> #include <rsys/rsys.h> #include <float.h> @@ -42,6 +43,8 @@ #define SHTR(Func) shtr_ ## Func #endif +#define SHTR_MAX_ISOTOPES_COUNT 12 + /* List of HITRAN species. They are classified as in the HITRAN database ,i.e. * their value corresponds to that of HITRAN. Note that they start at 1 rather * than zero and therefore do not strictly correspond to a C array index */ @@ -327,6 +330,11 @@ shtr_isotope_metadata_write (const struct shtr_isotope_metadata* metadata, FILE* stream); +SHTR_API res_T +shtr_isotope_metadata_compute_signature + (const struct shtr_isotope_metadata* metadata, + hash256_T signature); + /******************************************************************************* * Lines API ******************************************************************************/ diff --git a/src/shtr_isotope_metadata.c b/src/shtr_isotope_metadata.c @@ -629,6 +629,90 @@ error: goto exit; } +static int +cmp_isotopes(const void* a, const void* b) +{ + const struct shtr_isotope* isotope0 = a; + const struct shtr_isotope* isotope1 = b; + ASSERT(a && b); + return isotope0->id - isotope1->id; +} + +static INLINE void +hash_isotope(const struct shtr_isotope* isotope, struct sha256_ctx* ctx) +{ + ASSERT(isotope && ctx); + + #define HASH(Var) \ + sha256_ctx_update(ctx, (const char*)(&isotope->Var), sizeof(isotope->Var)) + HASH(abundance); + HASH(Q296K); + HASH(molar_mass); + HASH(gj); + HASH(id); + #undef HASH +} + +static INLINE void +hash_molecule + (const struct shtr_isotope_metadata* metadata, + const int molecule_id, + struct sha256_ctx* ctx) +{ + const struct molecule* molecule = NULL; + const struct shtr_isotope* isotopes = NULL; + struct shtr_isotope isotopes_sorted[SHTR_MAX_ISOTOPES_COUNT] = {0}; + size_t i = 0; + size_t nisotopes = 0; + + ASSERT(metadata && molecule_id >= 0 && ctx); + ASSERT((size_t)molecule_id < darray_molecule_size_get(&metadata->molecules)); + + molecule = darray_molecule_cdata_get(&metadata->molecules) + molecule_id; + + #define HASH(Bytes, Size) \ + sha256_ctx_update(ctx, (const char*)(Bytes), (Size)) + HASH(str_cget(&molecule->name), str_len(&molecule->name)); + HASH(&molecule->id, sizeof(molecule->id)); + #undef HASH + + nisotopes = molecule->isotopes_range[1] - molecule->isotopes_range[0]; + ASSERT(nisotopes <= SHTR_MAX_ISOTOPES_COUNT); + + isotopes = darray_isotope_cdata_get(&metadata->isotopes) + + molecule->isotopes_range[0]; + + /* Sort molecular isotopes according to their identifier so that the + * molecule's hash is independent of the order in which its isotopes are + * loaded. */ + FOR_EACH(i, 0, nisotopes) isotopes_sorted[i] = isotopes[i]; + qsort(isotopes_sorted, nisotopes, sizeof(struct shtr_isotope), cmp_isotopes); + + FOR_EACH(i, 0, nisotopes) hash_isotope(isotopes_sorted+i, ctx); +} + +static void +hash_molecule_list + (const struct shtr_isotope_metadata* metadata, + struct sha256_ctx* ctx) +{ + size_t i = 0; + size_t nmolecules = 0; + ASSERT(metadata && ctx); + + nmolecules = darray_molecule_size_get(&metadata->molecules); + + /* Iterate over the molecules in order of their ID to ensure that their + * loading order does not affect the hash value of the list */ + FOR_EACH(i, 0, SHTR_MAX_MOLECULE_COUNT) { + if(metadata->molid2idx[i] >= 0) { + hash_molecule(metadata, metadata->molid2idx[i], ctx); + --nmolecules; + if(nmolecules == 0) break; /* All loaded molecules have been hashed */ + } + } +} + static void release_isotope_metadata(ref_T* ref) { @@ -885,3 +969,26 @@ exit: error: goto exit; } + +res_T +shtr_isotope_metadata_compute_signature + (const struct shtr_isotope_metadata* metadata, + hash256_T signature) +{ + struct sha256_ctx ctx; + res_T res = RES_OK; + + if(!metadata || !signature) { + res = RES_BAD_ARG; + goto error; + } + + sha256_ctx_init(&ctx); + hash_molecule_list(metadata, &ctx); + sha256_ctx_finalize(&ctx, signature); + +exit: + return res; +error: + goto exit; +} diff --git a/src/test_shtr_isotope_metadata.c b/src/test_shtr_isotope_metadata.c @@ -81,6 +81,18 @@ molecule_print(FILE* fp, const struct shtr_molecule* molecule) } } +static void +molecule_print_reverse(FILE* fp, const struct shtr_molecule* molecule) +{ + size_t i; + CHK(fp && molecule); + + fprintf(fp, " %s (%d)\n", molecule->name, molecule->id); + FOR_EACH_REVERSE(i, molecule->nisotopes, 0) { + isotope_print(fp, molecule->isotopes+i-1); + } +} + static int isotope_eq(const struct shtr_isotope* i0, const struct shtr_isotope* i1) { @@ -310,6 +322,91 @@ check_equality } static void +test_signature(struct shtr* shtr) +{ + struct shtr_isotope_metadata* mdata1 = NULL; + struct shtr_isotope_metadata* mdata2 = NULL; + struct shtr_isotope_metadata* mdata3 = NULL; + struct shtr_isotope_metadata* mdata4 = NULL; + struct shtr_isotope_metadata* mdata5 = NULL; + hash256_T signature1; + hash256_T signature2; + hash256_T signature3; + hash256_T signature4; + hash256_T signature5; + FILE* fp = NULL; + + /* Setup the metadata of the H2O and the CO2 molecules */ + CHK(fp = tmpfile()); + fprintf(fp, "Molecule # Iso Abundance Q(296K) gj Molar Mass(g)\n"); + molecule_print(fp, &H2O); + molecule_print(fp, &CO2); + rewind(fp); + CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata1) == RES_OK); + CHK(fclose(fp) == 0); + + /* Check the signature API */ + CHK(shtr_isotope_metadata_compute_signature(NULL, signature1) == RES_BAD_ARG); + CHK(shtr_isotope_metadata_compute_signature(mdata1, NULL) == RES_BAD_ARG); + CHK(shtr_isotope_metadata_compute_signature(mdata1, signature1) == RES_OK); + + /* Setup the same metadata of the first one, in the same order */ + CHK(fp = tmpfile()); + fprintf(fp, "Molecule # Dummy comment\n"); + molecule_print(fp, &H2O); + molecule_print(fp, &CO2); + rewind(fp); + CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata2) == RES_OK); + CHK(fclose(fp) == 0); + + CHK(shtr_isotope_metadata_compute_signature(mdata2, signature2) == RES_OK); + CHK(hash256_eq(signature2, signature1) != 0); + + /* Configure metadata for H2O and CO2 molecules but in a different order. + * This may not change the signature */ + CHK(fp = tmpfile()); + fprintf(fp, "Molecule # Iso Abundance Q(296K) gj Molar Mass(g)\n"); + molecule_print(fp, &CO2); + molecule_print(fp, &H2O); + rewind(fp); + CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata3) == RES_OK); + CHK(fclose(fp) == 0); + + CHK(shtr_isotope_metadata_compute_signature(mdata3, signature3) == RES_OK); + CHK(hash256_eq(signature3, signature1) != 0); + + /* Configure metadata for CO2 */ + CHK(fp = tmpfile()); + fprintf(fp, "Molecule # Iso Abundance Q(296K) gj Molar Mass(g)\n"); + molecule_print(fp, &CO2); + rewind(fp); + CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata4) == RES_OK); + CHK(fclose(fp) == 0); + + CHK(shtr_isotope_metadata_compute_signature(mdata4, signature4) == RES_OK); + CHK(hash256_eq(signature4, signature1) == 0); + + /* Configure metadata for CO2 but print its isotope in different order. + * This may not change the signature */ + CHK(fp = tmpfile()); + fprintf(fp, "Molecule # Iso Abundance Q(296K) gj Molar Mass(g)\n"); + molecule_print_reverse(fp, &CO2); + rewind(fp); + CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata5) == RES_OK); + CHK(fclose(fp) == 0); + + CHK(shtr_isotope_metadata_compute_signature(mdata4, signature5) == RES_OK); + CHK(hash256_eq(signature5, signature4) != 0); + + /* Compute the signatures */ + CHK(shtr_isotope_metadata_ref_put(mdata1) == RES_OK); + CHK(shtr_isotope_metadata_ref_put(mdata2) == RES_OK); + CHK(shtr_isotope_metadata_ref_put(mdata3) == RES_OK); + CHK(shtr_isotope_metadata_ref_put(mdata4) == RES_OK); + CHK(shtr_isotope_metadata_ref_put(mdata5) == RES_OK); +} + +static void test_serialization(struct shtr* shtr) { struct shtr_isotope_metadata* mdata1 = NULL; @@ -460,6 +557,7 @@ main(int argc, char** argv) test_load(shtr); test_load_failures(shtr); + test_signature(shtr); test_serialization(shtr); FOR_EACH(i, 1, argc) { test_load_file(shtr, argv[i]);