commit 2c37705a0739bdfbc1c77fd4df177ff7eda1277f
parent 0d1212b9bba3dfa7589772b0beca126530e1a852
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Fri, 26 Jun 2026 16:03:22 +0200
Improve detection of duplicate molecules
Verifies whether the molecule name matches the expected name for its
identifier. Thus, once the analysis is successfully completed, if a
molecule is already registered under the corresponding identifier, it
means it is a duplicate.
Note that previously, when a conflict was detected, accessing a NULL
pointer caused a segmentation fault. This commit not only makes the
error message clearer but also corrects this invalid read.
Diffstat:
2 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/src/shtr_isotope_metadata.c b/src/shtr_isotope_metadata.c
@@ -155,7 +155,6 @@ flush_molecule
{
size_t nisotopes = 0;
size_t ientry = SIZE_MAX;
- size_t* pimolecule = NULL;
res_T res = RES_OK;
ASSERT(metadata && molecule && MOLECULE_IS_VALID(molecule));
@@ -195,16 +194,9 @@ flush_molecule
/* Register the molecule */
if(metadata->molid2idx[molecule->id] >= 0) {
- const struct molecule* molecule2 = NULL;
- molecule2 = darray_molecule_cdata_get(&metadata->molecules) + *pimolecule;
- ERROR(metadata->shtr,
- "%s: cannot register the %s molecule. "
- "The %s molecule has the same identifier %i.\n",
- txtrdr_get_name(txtrdr),
- str_cget(&molecule->name),
- str_cget(&molecule2->name),
- molecule->id);
- res = RES_OK;
+ ERROR(metadata->shtr, "%s: the molecule %s appears several times.\n",
+ txtrdr_get_name(txtrdr), str_cget(&molecule->name));
+ res = RES_BAD_ARG;
goto error;
}
ASSERT((size_t)((int)ientry) == ientry);
@@ -246,7 +238,7 @@ parse_molecule
res = str_set(&molecule->name, name);
if(res != RES_OK) {
ERROR(metadata->shtr,
- "%s:%lu: error seting the molecule name `%s' -- %s.\n",
+ "%s:%lu: error setting the molecule name `%s' -- %s.\n",
txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr),
name, res_to_cstr(res));
goto error;
@@ -263,14 +255,33 @@ parse_molecule
id[len-1] = '\0'; /* Rm trailing parenthesis */
res = cstr_to_int(id+1/*Rm leading parenthesis*/, &molecule->id);
- if(res != RES_OK || !MOLECULE_IS_VALID(molecule)) {
+ if(res != RES_OK) {
id[len-1] = ')'; /* Re-add the trailing parenthesis */
- ERROR(metadata->shtr, "%s:%lu: invalid molecule identifier `%s'.\n",
+ ERROR(metadata->shtr,
+ "%s:%lu: invalid molecule identifier `%s'.\n",
txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr), id);
res = RES_BAD_ARG;
goto error;
}
+ if(!MOLECULE_IS_VALID(molecule)) {
+ ERROR(metadata->shtr,
+ "%s:%lu: the `%s (%d)' molecule is not supported.\n",
+ txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr),
+ name, molecule->id);
+ res = RES_BAD_ARG;
+ goto error;
+ }
+
+ if(strcmp(name, shtr_molecule_cstr(molecule->id))) {
+ ERROR(metadata->shtr,
+ "%s:%li: the molecule %d is named `%s' instead of `%s'.\n",
+ txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr),
+ molecule->id, name, shtr_molecule_cstr(molecule->id));
+ res = RES_BAD_ARG;
+ goto error;
+ }
+
tk = strtok_r(NULL, " \t", &tk_ctx);
if(tk) {
WARN(metadata->shtr, "%s:%lu: unexpected text `%s'.\n",
diff --git a/src/test_shtr_isotope_metadata.c b/src/test_shtr_isotope_metadata.c
@@ -559,6 +559,24 @@ test_load_failures(struct shtr* shtr)
rewind(fp);
CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata) == RES_MEM_ERR);
CHK(fclose(fp) == 0);
+
+ /* Duplicate molecule */
+ CHK(fp = tmpfile());
+ fprintf(fp, "Molecule # Iso Abundance Q(296K) gj Molar Mass(g)\n");
+ molecule_print(fp, &H2O);
+ molecule_print(fp, &H2O);
+ rewind(fp);
+ CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata) == RES_BAD_ARG);
+ CHK(fclose(fp) == 0);
+
+ /* Invalid molecule name */
+ CHK(fp = tmpfile());
+ fprintf(fp, "Comment line\n");
+ fprintf(fp, "H_2O (1)\n");
+ isotope_print(fp, &H2O_isotopes[0]);
+ rewind(fp);
+ CHK(shtr_isotope_metadata_load_stream(shtr, fp, NULL, &mdata) == RES_BAD_ARG);
+ CHK(fclose(fp) == 0);
}
static void