diff --git a/app/Actions/Coconut/SearchMolecule.php b/app/Actions/Coconut/SearchMolecule.php index e6964b49..3b47ca1b 100644 --- a/app/Actions/Coconut/SearchMolecule.php +++ b/app/Actions/Coconut/SearchMolecule.php @@ -120,9 +120,9 @@ private function determineQueryType($query) return 'parttialinchikey'; } - // Check for molecular formula (must match pattern and not contain SMILES-specific characters) - // Molecular formulas contain only element symbols and numbers (e.g., C6H12O6, H2O) - if (preg_match('/^([A-Z][a-z]?\d*)+$/', $query) && ! preg_match('/[()@\[\]\/\\\\=#\-+]/', $query)) { + // Molecular formulas use condensed notation (e.g., C6H12O6, H2O), not SMILES chains + // like C1CCCCC1 or CCO where the same element appears in consecutive tokens. + if ($this->looksLikeMolecularFormula($query)) { return 'molecularformula'; } @@ -139,6 +139,32 @@ private function determineQueryType($query) return 'text'; } + /** + * True when the query looks like a Hill-order molecular formula, not a SMILES string. + */ + private function looksLikeMolecularFormula(string $query): bool + { + if (! preg_match('/^([A-Z][a-z]?\d*)+$/', $query) || preg_match('/[()@\[\]\/\\\\=#\-+]/', $query)) { + return false; + } + + preg_match_all('/[A-Z][a-z]?\d*/', $query, $matches); + + $previousElement = null; + foreach ($matches[0] as $token) { + preg_match('/^([A-Z][a-z]?)/', $token, $elementMatch); + $element = $elementMatch[1]; + + if ($element === $previousElement) { + return false; + } + + $previousElement = $element; + } + + return true; + } + /** * Apply status filter to SQL query. * Only adds a filter when status is 'approved' or 'revoked'. diff --git a/resources/views/livewire/welcome.blade.php b/resources/views/livewire/welcome.blade.php index 2b38b6c1..bf659e5d 100644 --- a/resources/views/livewire/welcome.blade.php +++ b/resources/views/livewire/welcome.blade.php @@ -206,7 +206,7 @@ class="h-full w-full border-transparent py-2 pl-8 pr-3 text-sm text-gray-900 pla