From fa2ec51988563e767fc7e1c194576e9d2d659aae Mon Sep 17 00:00:00 2001 From: "Shah, Ankur" Date: Sun, 22 Mar 2026 18:06:03 -0500 Subject: [PATCH] Fixed header issue with Fidelity csv --- README.md | 248 +++++++++++++++++- .../Providers/Fidelity/FidelityParser.cs | 4 +- src/CsvToOfx.Parsers/Shared/CsvRowReader.cs | 57 +++- tests/CsvToOfx.Parsers.Tests/UnitTest1.cs | 46 +++- 4 files changed, 336 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 6ee23d3..f43c7d3 100644 --- a/README.md +++ b/README.md @@ -1 +1,247 @@ -# csv2ofx \ No newline at end of file +# csv2ofx + +Convert brokerage CSV exports into OFX investment statements. + +The current CLI is focused on Fidelity brokerage CSV files and emits OFX 2.1.1 style investment transactions suitable for import into personal finance tools that accept investment OFX. + +## Current Support + +- Input provider: `fidelity` +- Input format: CSV +- Output format: OFX investment statement +- Current parser type: brokerage transactions + +## What It Already Handles + +- Fidelity CSV files with blank rows before the real header +- Fidelity CSV files with disclaimer/footer text after the transaction section +- Auto-detection of the actual Fidelity header row +- Automatic skipping of trailing footer/disclaimer sections after blank lines +- Duplicate-safe auto-generated output file names when `--ofx` is not supplied +- Stable generated `FITID` values from row content +- Optional start-date filtering +- Security IDs as either mapped `CUSIP` values or raw `ticker` values +- Embedded security list in the generated OFX +- Common Fidelity action normalization: + - buy / purchase / reinvestment + - sell + - dividend / interest / return of capital + - transfers + - fee / tax-style expenses + - stock splits / reverse splits + +## Requirements + +- .NET 8 SDK + +## Build + +```bash +dotnet build CsvToOfx.sln +``` + +## Run + +You can run the CLI directly from the repo: + +```bash +dotnet run --project src/CsvToOfx.Cli -- --csv /path/to/fidelity.csv --acct-id 123456789 +``` + +That will create an OFX file next to the CSV using the same base name, for example: + +```text +/path/to/fidelity.ofx +``` + +If that file already exists, the program will create: + +```text +/path/to/fidelity_1.ofx +/path/to/fidelity_2.ofx +... +``` + +## CLI Options + +The program currently supports both `--key=value` and `--key value`. + +### Required + +- `--csv` + - Path to the input CSV file +- `--acct-id` + - Account ID to embed in the generated OFX + +### Optional + +- `--source` + - Provider code + - Default: `fidelity` +- `--ofx` + - Output OFX path + - If omitted, the tool auto-generates a non-conflicting `.ofx` path next to the CSV +- `--start-date` + - Filters out rows before the given date +- `--security-id-type` + - Controls how securities are identified in OFX + - Supported values: + - `cusip` (default) + - `ticker` + +## Examples + +Basic conversion: + +```bash +dotnet run --project src/CsvToOfx.Cli -- \ + --csv ~/Downloads/fidelity.csv \ + --acct-id Z12345678 +``` + +Write to a specific OFX path: + +```bash +dotnet run --project src/CsvToOfx.Cli -- \ + --csv ~/Downloads/fidelity.csv \ + --acct-id Z12345678 \ + --ofx ~/Downloads/fidelity-import.ofx +``` + +Filter to transactions on or after a date: + +```bash +dotnet run --project src/CsvToOfx.Cli -- \ + --csv ~/Downloads/fidelity.csv \ + --acct-id Z12345678 \ + --start-date 2026-01-01 +``` + +Use ticker symbols instead of mapped CUSIPs: + +```bash +dotnet run --project src/CsvToOfx.Cli -- \ + --csv ~/Downloads/fidelity.csv \ + --acct-id Z12345678 \ + --security-id-type ticker +``` + +Using `--key=value` syntax: + +```bash +dotnet run --project src/CsvToOfx.Cli -- \ + --csv=~/Downloads/fidelity.csv \ + --acct-id=Z12345678 \ + --start-date=3/1/26 +``` + +## Accepted Date Formats + +`--start-date` and Fidelity row dates currently accept these formats: + +- `yyyy-MM-dd` +- `MM/dd/yyyy` +- `MM-dd-yyyy` +- `MM/dd/yy` +- `M/d/yy` +- `M/d/yyyy` +- `MM/d/yy` +- `M/dd/yy` +- `MM/d/yyyy` +- `M/dd/yyyy` + +Examples: + +- `2026-03-01` +- `03/01/2026` +- `3/1/26` + +## Security ID Behavior + +By default, the tool prefers `CUSIP` identifiers when a ticker can be mapped through the embedded security map. + +If you pass: + +```bash +--security-id-type ticker +``` + +the tool will keep securities as ticker-based IDs instead. + +If a symbol already looks like a CUSIP, it is treated as a CUSIP automatically. + +## OFX Output Behavior + +The generated OFX currently includes: + +- investment transaction list +- account ID from `--acct-id` +- generated `FITID` for each transaction +- security identifiers +- security list message set +- transaction memos when available +- normalized totals, units, and prices + +Transaction categories currently written: + +- `BUYSTOCK` +- `SELLSTOCK` +- `INCOME` +- `INVEXPENSE` +- `STOCKSPLIT` +- `INVBANKTRAN` + +## Fidelity Notes + +The Fidelity parser expects the standard transaction export columns and looks for a header containing: + +- `Run Date` +- `Action` +- `Symbol` +- `Description` +- `Type` +- `Price` +- `Quantity` +- `Commission` +- `Fees` +- `Amount` + +It now tolerates malformed exports where: + +- the first few rows are blank +- blank rows appear between the header and footer/disclaimer area +- trailing disclaimer text appears after the real transaction block + +## Defaults and Assumptions + +- default source is `fidelity` +- default output currency is `USD` +- generated output always includes the security list +- output subaccount values are currently written as `CASH` +- unsupported or unknown actions currently fall back to cash-transfer behavior +- amount parsing is normalized to absolute values + +## Exit Behavior + +The CLI exits with an error if: + +- `--csv` is missing +- `--acct-id` is missing +- `--source` is unknown + +## Tests + +Run tests with: + +```bash +dotnet test tests/CsvToOfx.Core.Tests/CsvToOfx.Core.Tests.csproj +dotnet test tests/CsvToOfx.Parsers.Tests/CsvToOfx.Parsers.Tests.csproj +``` + +## Roadmap Gaps + +Things clearly present in the codebase but not yet surfaced as broader product features: + +- parser abstraction supports multiple providers, but only Fidelity is currently registered +- parser capability flags support non-CSV and non-brokerage formats, but they are not implemented here yet +- subaccount inference service exists, but the OFX writer currently emits `CASH` subaccounts diff --git a/src/CsvToOfx.Parsers/Providers/Fidelity/FidelityParser.cs b/src/CsvToOfx.Parsers/Providers/Fidelity/FidelityParser.cs index 3ed0310..22cf1f7 100644 --- a/src/CsvToOfx.Parsers/Providers/Fidelity/FidelityParser.cs +++ b/src/CsvToOfx.Parsers/Providers/Fidelity/FidelityParser.cs @@ -21,7 +21,7 @@ public ParseResult Parse(RawStatement input, ParserContext ctx) var transactions = new List(); var securities = new Dictionary(StringComparer.OrdinalIgnoreCase); - foreach (var row in reader.ReadRows(input.Content)) + foreach (var row in reader.ReadRows(input.Content, RequiredFields)) { // skip empty rows var nonEmpty = row.Values.Count(v => !string.IsNullOrWhiteSpace(v)); @@ -97,4 +97,4 @@ public ParseResult Parse(RawStatement input, ParserContext ctx) _ => Get(row, "Description") }; } -} \ No newline at end of file +} diff --git a/src/CsvToOfx.Parsers/Shared/CsvRowReader.cs b/src/CsvToOfx.Parsers/Shared/CsvRowReader.cs index d186711..ce1569a 100644 --- a/src/CsvToOfx.Parsers/Shared/CsvRowReader.cs +++ b/src/CsvToOfx.Parsers/Shared/CsvRowReader.cs @@ -7,32 +7,30 @@ public sealed class CsvRowReader { private readonly CsvConfiguration _conf = new(CultureInfo.InvariantCulture) { - HasHeaderRecord = true, + HasHeaderRecord = false, DetectColumnCountChanges = false, BadDataFound = null, TrimOptions = TrimOptions.Trim }; - public IEnumerable> ReadRows(Stream csv) + public IEnumerable> ReadRows(Stream csv, IEnumerable? requiredHeaders = null) { using var reader = new StreamReader(csv); using var csvr = new CsvReader(reader, _conf); + string[]? headerRecord = null; var seenData = false; + var requiredHeaderSet = requiredHeaders is null + ? null + : new HashSet(requiredHeaders, StringComparer.OrdinalIgnoreCase); + while (csvr.Read()) { - var colCount = csvr.Parser.Count; - if (colCount == 0) continue; // skip empty physical rows - - var dict = csvr.GetRecord() as IDictionary; - if (dict is null) continue; - - // drop rows that don't match header shape (e.g., disclaimers/footers) - if (csvr.HeaderRecord is { Length: > 0 } && colCount < csvr.HeaderRecord.Length) + var record = csvr.Parser.Record; + if (record is null || record.Length == 0) continue; - var converted = dict.ToDictionary(k => k.Key, v => v.Value?.ToString()); - var nonEmpty = converted.Values.Count(v => !string.IsNullOrWhiteSpace(v)); + var nonEmpty = record.Count(v => !string.IsNullOrWhiteSpace(v)); if (nonEmpty == 0) { @@ -42,8 +40,41 @@ public sealed class CsvRowReader continue; // skip leading blank lines } + if (headerRecord is null) + { + if (!MatchesHeader(record, requiredHeaderSet)) + continue; + + headerRecord = record.Select(v => v?.Trim() ?? string.Empty).ToArray(); + continue; + } + + // drop rows that don't match header shape (e.g., disclaimers/footers) + if (record.Length < headerRecord.Length) + continue; + + var converted = new Dictionary(StringComparer.OrdinalIgnoreCase); + for (var i = 0; i < headerRecord.Length; i++) + converted[headerRecord[i]] = i < record.Length ? record[i] : null; + seenData = true; yield return converted; } } -} \ No newline at end of file + + private static bool MatchesHeader(string[] record, HashSet? requiredHeaders) + { + var normalized = record + .Where(v => !string.IsNullOrWhiteSpace(v)) + .Select(v => v.Trim()) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + + if (normalized.Count == 0) + return false; + + if (requiredHeaders is null || requiredHeaders.Count == 0) + return true; + + return requiredHeaders.IsSubsetOf(normalized); + } +} diff --git a/tests/CsvToOfx.Parsers.Tests/UnitTest1.cs b/tests/CsvToOfx.Parsers.Tests/UnitTest1.cs index 8e3851b..690c260 100644 --- a/tests/CsvToOfx.Parsers.Tests/UnitTest1.cs +++ b/tests/CsvToOfx.Parsers.Tests/UnitTest1.cs @@ -1,10 +1,50 @@ +using System.Text; +using CsvToOfx.Core.Models; +using CsvToOfx.Core.Services; +using CsvToOfx.Parsers.Abstractions; +using CsvToOfx.Parsers.Providers.Fidelity; +using FluentAssertions; + namespace CsvToOfx.Parsers.Tests; -public class UnitTest1 +public class FidelityParserTests { [Fact] - public void Test1() + public void Parse_SkipsLeadingBlankRows_AndStopsBeforeFooterDisclaimer() { + const string csv = """ + + +Run Date,Action,Symbol,Description,Type,Price,Quantity,Commission,Fees,Amount +3/1/26,You bought,ABC,Alpha Inc,Common Stock,10.50,2,0,0,-21.00 +3/2/26,Dividend Received,XYZ,XYZ Dividend,Cash,0,0,0,0,5.25 + + +The data in this file is for informational purposes only. +Please review your official statement for complete details. +"""; + + using var stream = new MemoryStream(Encoding.UTF8.GetBytes(csv)); + + var parser = new FidelityParser(); + var ctx = new ParserContext + { + AccountId = "acct-1", + Institution = "fidelity", + CurrencyDefault = "USD", + DateParser = new DateParser(), + AmountParser = new AmountParser(), + FitIdGenerator = new FitIdGenerator(), + SubacctResolver = new SubacctResolver(), + SecurityResolver = new SecurityResolver(preferCusip: false) + }; + + var result = parser.Parse(new RawStatement("fidelity", stream, ".csv"), ctx); + result.Transactions.Should().HaveCount(2); + result.Transactions[0].Security!.Id.Should().Be("ABC"); + result.Transactions[0].Amount.Should().Be(21.00m); + result.Transactions[1].Security!.Id.Should().Be("XYZ"); + result.Transactions[1].Amount.Should().Be(5.25m); } -} \ No newline at end of file +}