Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

### Changed

* NP Classifier pathway, superclass, and class are stored and exposed as JSON arrays so all API labels are retained (not only the first). Re-run `php artisan coconut:npclassify --all --force` after deploy to refresh existing molecules.

## 1.0.0 (2025-11-27)


Expand Down
12 changes: 11 additions & 1 deletion app/Actions/Coconut/SearchMolecule.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use App\Models\Collection;
use App\Models\Molecule;
use App\Models\Organism;
use App\Support\NpClassifierResults;
use Illuminate\Database\QueryException;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Facades\DB;
Expand Down Expand Up @@ -398,7 +399,16 @@ private function buildFiltersStatement($filterMap)
$params[] = json_encode($dbs);
} else {
$filterValue = str_replace('+', ' ', $filterValue);
$sql .= "(LOWER(REGEXP_REPLACE({$filterMap[$filterKey]}, '\\s+', '-', 'g'))::TEXT ILIKE ?)";
$jsonbArrayFilters = NpClassifierResults::jsonbArrayFilterColumns();
if (isset($jsonbArrayFilters[$filterKey])) {
$column = $filterMap[$filterKey];
$sql .= "(EXISTS (
SELECT 1 FROM jsonb_array_elements_text({$column}) AS elem
WHERE LOWER(REGEXP_REPLACE(elem, '\\s+', '-', 'g')) ILIKE ?
))";
} else {
$sql .= "(LOWER(REGEXP_REPLACE({$filterMap[$filterKey]}, '\\s+', '-', 'g'))::TEXT ILIKE ?)";
}
$params[] = '%'.$filterValue.'%';
}
}
Expand Down
49 changes: 29 additions & 20 deletions app/Console/Commands/Classify.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use App\Models\Collection;
use App\Models\Molecule;
use App\Models\Properties;
use App\Support\NpClassifierResults;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Http;
Expand All @@ -16,7 +17,7 @@ class Classify extends Command
/**
* The name and signature of the console command.
*/
protected $signature = 'coconut:npclassify-old {collection_id?}';
protected $signature = 'coconut:npclassify-old {collection_id?} {--force : Re-classify molecules that already have NP Classifier data}';

/**
* The console command description.
Expand Down Expand Up @@ -61,9 +62,25 @@ public function handle()
$progressBar = $this->output->createProgressBar($totalCount);
$progressBar->start();

$force = $this->option('force');

// Process molecules in chunks.
$query->select(['molecules.id', 'molecules.canonical_smiles'])
->chunk(100, function ($mols) use ($progressBar) {
->when(! $force, function ($q) {
$q->whereHas('properties', function ($propertiesQuery) {
$propertiesQuery->whereNull('np_classifier_pathway');
});
})
->when($force, function ($q) {
$q->whereHas('properties', function ($propertiesQuery) {
$propertiesQuery->where(function ($classified) {
$classified->whereNotNull('np_classifier_pathway')
->orWhereNotNull('np_classifier_superclass')
->orWhereNotNull('np_classifier_class');
});
});
})
->chunk(100, function ($mols) use ($progressBar, $force) {
$data = [];
foreach ($mols as $mol) {
$id = $mol->id;
Expand All @@ -88,7 +105,7 @@ public function handle()
}

// Insert the data in one transaction.
$this->insertBatch($data);
$this->insertBatch($data, $force);
});

$progressBar->finish();
Expand Down Expand Up @@ -143,28 +160,20 @@ private function fetchFromApi(string $endpoint, string $smiles)
*
* @return void
*/
private function insertBatch(array $data)
private function insertBatch(array $data, bool $force = false)
{
DB::transaction(function () use ($data) {
DB::transaction(function () use ($data, $force) {
foreach ($data as $row) {
$properties = Properties::where('molecule_id', $row['id'])->whereNull('np_classifier_pathway')->first();
if ($properties) {
$properties['np_classifier_pathway'] = (isset($row['pathway_results'][0]) && ! empty($row['pathway_results'][0]))
? $row['pathway_results'][0]
: null;

$properties['np_classifier_superclass'] = (isset($row['superclass_results'][0]) && ! empty($row['superclass_results'][0]))
? $row['superclass_results'][0]
: null;
$query = Properties::where('molecule_id', $row['id']);

$properties['np_classifier_class'] = (isset($row['class_results'][0]) && ! empty($row['class_results'][0]))
? $row['class_results'][0]
: null;
if (! $force) {
$query->whereNull('np_classifier_pathway');
}

$properties['np_classifier_is_glycoside'] = (isset($row['isglycoside']) && $row['isglycoside'] !== '')
? filter_var($row['isglycoside'], FILTER_VALIDATE_BOOLEAN)
: null;
$properties = $query->first();

if ($properties) {
$properties->fill(NpClassifierResults::fromApiResponse($row));
$properties->save();
}
}
Expand Down
16 changes: 12 additions & 4 deletions app/Console/Commands/GenerateBubbleFrequencyCharts.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,18 @@ public function handle()
$query1 .= $first_table == 'properties' ? 'JOIN molecules m ON f.molecule_id=m.id ' : '';
$query1 .= "WHERE f.$first_column IS NOT NULL AND f.$first_column!='' ";
$query1 .= $first_table == 'properties' ? 'AND m.active = TRUE AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE) ' : '';
$query2 = "SELECT DISTINCT s.$second_column col, COUNT(*) count FROM $second_table s ";
$query2 .= $second_table == 'properties' ? 'JOIN molecules m ON s.molecule_id=m.id ' : '';
$query2 .= "WHERE s.$second_column IS NOT NULL AND s.$second_column!='' ";
$query2 .= $second_table == 'properties' ? 'AND m.active = TRUE AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE) ' : '';
if ($second_column === 'np_classifier_class') {
$query2 = "SELECT cls.col, COUNT(*) count FROM $second_table s ";
$query2 .= $second_table == 'properties' ? 'JOIN molecules m ON s.molecule_id=m.id ' : '';
$query2 .= ", LATERAL jsonb_array_elements_text(s.$second_column) AS cls(col) ";
$query2 .= "WHERE s.$second_column IS NOT NULL ";
$query2 .= $second_table == 'properties' ? 'AND m.active = TRUE AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE) ' : '';
} else {
$query2 = "SELECT DISTINCT s.$second_column col, COUNT(*) count FROM $second_table s ";
$query2 .= $second_table == 'properties' ? 'JOIN molecules m ON s.molecule_id=m.id ' : '';
$query2 .= "WHERE s.$second_column IS NOT NULL AND s.$second_column!='' ";
$query2 .= $second_table == 'properties' ? 'AND m.active = TRUE AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE) ' : '';
}

$query1 .= 'GROUP BY 1';
$query2 .= 'GROUP BY 1';
Expand Down
31 changes: 21 additions & 10 deletions app/Console/Commands/GeneratePropertiesJson.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace App\Console\Commands;

use App\Support\NpClassifierResults;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;

Expand Down Expand Up @@ -99,16 +100,26 @@ public function handle()
// For boolean types, set true and false as possible values
$columnData['values'] = [true, false];
} elseif ($type === 'select') {
// For text or JSONB types, fetch unique values
$uniqueValues = DB::table($tableName)
->select($column)
->distinct()
->pluck($column)
->filter()
->values()
->toArray();

$columnData['unique_values'] = $uniqueValues;
if ($dbType === 'jsonb' && in_array($column, NpClassifierResults::jsonbArrayPropertyColumns(), true)) {
$rows = DB::select("
SELECT DISTINCT value
FROM {$tableName},
LATERAL jsonb_array_elements_text({$column}) AS value
WHERE {$column} IS NOT NULL
ORDER BY value
");
$columnData['unique_values'] = array_map(static fn ($row) => $row->value, $rows);
} else {
$uniqueValues = DB::table($tableName)
->select($column)
->distinct()
->pluck($column)
->filter()
->values()
->toArray();

$columnData['unique_values'] = $uniqueValues;
}
}

// Determine the key for the JSON output
Expand Down
8 changes: 5 additions & 3 deletions app/Console/Commands/GenerateStackedBarNpClassifierData.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ public function handle()

// Execute the query from the user
$results = DB::select('
select c.title, np_classifier_class, count(np_classifier_class) as count
select c.title, cls.value as np_classifier_class, count(*) as count
from properties p
join molecules m on p.molecule_id=m.id
join collection_molecule cm on cm.molecule_id=m.id
join collections c on cm.collection_id=c.id
GROUP by 1,2
order by 1,3 desc
cross join lateral jsonb_array_elements_text(p.np_classifier_class) as cls(value)
where p.np_classifier_class is not null
group by 1, 2
order by 1, 3 desc
');

if (empty($results)) {
Expand Down
7 changes: 4 additions & 3 deletions app/Console/Commands/ImportNPClassifierOutput.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace App\Console\Commands;

use App\Models\Molecule;
use App\Support\NpClassifierResults;
use DB;
use Illuminate\Console\Command;
use Log;
Expand Down Expand Up @@ -84,9 +85,9 @@ private function insertBatch(array $data)
DB::transaction(function () use ($data) {
foreach ($data as $row) {
$properties = Molecule::where('identifier', $row['identifier'])->first()->properties()->get()[0];
$properties['np_classifier_pathway'] = $row['pathway_results'] == '' ? null : $row['pathway_results'];
$properties['np_classifier_superclass'] = $row['superclass_results'] == '' ? null : $row['superclass_results'];
$properties['np_classifier_class'] = $row['class_results'] == '' ? null : $row['class_results'];
$properties['np_classifier_pathway'] = NpClassifierResults::parseImportValue($row['pathway_results'] ?? null);
$properties['np_classifier_superclass'] = NpClassifierResults::parseImportValue($row['superclass_results'] ?? null);
$properties['np_classifier_class'] = NpClassifierResults::parseImportValue($row['class_results'] ?? null);
$properties['np_classifier_is_glycoside'] = $row['isglycoside'] == '' ? null : $row['isglycoside'];
$properties->save();
}
Expand Down
44 changes: 33 additions & 11 deletions app/Console/Commands/SubmissionsAutoProcess/ClassifyAuto.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ class ClassifyAuto extends Command
/**
* The name and signature of the console command.
*/
protected $signature = 'coconut:npclassify {collection_id? : The ID of the collection to process} {--all : Process all collections}';
protected $signature = 'coconut:npclassify
{collection_id? : The ID of the collection to process}
{--all : Process all collections}
{--force : Re-classify molecules that already have NP Classifier data}';

/**
* The console command description.
Expand All @@ -29,6 +32,7 @@ class ClassifyAuto extends Command
public function handle()
{
$collection_id = $this->argument('collection_id');
$force = $this->option('force');

if (! $collection_id && ! $this->option('all')) {
Log::error('Please specify either a collection_id or use --all flag');
Expand All @@ -47,27 +51,45 @@ public function handle()

$collectionLabel = $collection_id !== null ? "collection ID: {$collection_id}" : 'all collections';

Log::info("Classifying molecules using NPClassifier for {$collectionLabel}");
Log::info("Classifying molecules using NPClassifier for {$collectionLabel}".($force ? ' (force re-classify)' : ''));

// Use raw query to avoid ambiguous column issues
$conditions = '
if ($force) {
$conditions = '
WHERE molecules.active = true
AND (
properties.np_classifier_pathway IS NOT NULL
OR properties.np_classifier_superclass IS NOT NULL
OR properties.np_classifier_class IS NOT NULL
)
';
} else {
$conditions = '
WHERE molecules.active = true
AND properties.np_classifier_pathway IS NULL
AND properties.np_classifier_superclass IS NULL
AND properties.np_classifier_class IS NULL
AND properties.np_classifier_is_glycoside IS NULL
';
}

$bindings = [];
if ($collection_id !== null) {
$conditions = '
WHERE entries.collection_id = ?
AND molecules.active = true
AND properties.np_classifier_pathway IS NULL
$classifiedClause = $force
? '(
properties.np_classifier_pathway IS NOT NULL
OR properties.np_classifier_superclass IS NOT NULL
OR properties.np_classifier_class IS NOT NULL
)'
: 'properties.np_classifier_pathway IS NULL
AND properties.np_classifier_superclass IS NULL
AND properties.np_classifier_class IS NULL
AND properties.np_classifier_is_glycoside IS NULL
';
AND properties.np_classifier_is_glycoside IS NULL';

$conditions = "
WHERE entries.collection_id = ?
AND molecules.active = true
AND {$classifiedClause}
";
$bindings[] = $collection_id;
}

Expand Down Expand Up @@ -101,7 +123,7 @@ public function handle()
Log::info("Processing batch of {$moleculeCount} molecules for classification in {$collectionLabel}");

$batchJobs = [];
$batchJobs[] = new ClassifyMoleculeBatch($moleculeIds);
$batchJobs[] = new ClassifyMoleculeBatch($moleculeIds, $force);

Bus::batch($batchJobs)
->catch(function (Batch $batch, Throwable $e) use ($collectionLabel) {
Expand Down
29 changes: 12 additions & 17 deletions app/Jobs/ClassifyMoleculeAuto.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace App\Jobs;

use App\Models\Properties;
use App\Support\NpClassifierResults;
use Illuminate\Bus\Batchable;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
Expand All @@ -19,6 +20,8 @@ class ClassifyMoleculeAuto implements ShouldQueue

protected $molecule;

protected bool $force;

/**
* The step name for this job.
*/
Expand All @@ -32,9 +35,10 @@ class ClassifyMoleculeAuto implements ShouldQueue
/**
* Create a new job instance.
*/
public function __construct($molecule)
public function __construct($molecule, bool $force = false)
{
$this->molecule = $molecule;
$this->force = $force;
}

/**
Expand Down Expand Up @@ -125,25 +129,16 @@ private function fetchFromApi(string $endpoint, string $smiles)
*/
private function updateProperties(int $moleculeId, array $data): void
{
$properties = Properties::where('molecule_id', $moleculeId)->whereNull('np_classifier_pathway')->first();

if ($properties) {
$properties['np_classifier_pathway'] = (isset($data['pathway_results'][0]) && ! empty($data['pathway_results'][0]))
? $data['pathway_results'][0]
: null;
$query = Properties::where('molecule_id', $moleculeId);

$properties['np_classifier_superclass'] = (isset($data['superclass_results'][0]) && ! empty($data['superclass_results'][0]))
? $data['superclass_results'][0]
: null;

$properties['np_classifier_class'] = (isset($data['class_results'][0]) && ! empty($data['class_results'][0]))
? $data['class_results'][0]
: null;
if (! $this->force) {
$query->whereNull('np_classifier_pathway');
}

$properties['np_classifier_is_glycoside'] = (isset($data['isglycoside']) && $data['isglycoside'] !== '')
? filter_var($data['isglycoside'], FILTER_VALIDATE_BOOLEAN)
: null;
$properties = $query->first();

if ($properties) {
$properties->fill(NpClassifierResults::fromApiResponse($data));
$properties->save();
}
}
Expand Down
Loading
Loading