From 75cd4dc98b76a23e0ae5f6b243e6d4d700e88baa Mon Sep 17 00:00:00 2001 From: Leonard Percival Date: Tue, 7 Mar 2023 16:11:24 +0000 Subject: [PATCH 1/6] bind --- lib/roo/excelx.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index 91ebc1e0..4c768d8d 100755 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -458,6 +458,7 @@ def process_zipfile_entries(entries) rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}" when /drawing([0-9]+).xml.rels$/ # Extracting drawing relationships to make images lists for each sheet + binding.pry nr = Regexp.last_match[1].to_i image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}" end From 4d9cecc931696f9ee8f22b13a8381d1ca4f510ed Mon Sep 17 00:00:00 2001 From: Leonard Percival Date: Wed, 8 Mar 2023 14:22:50 +0000 Subject: [PATCH 2/6] added in drawing support --- lib/roo/excelx.rb | 7 +++++-- lib/roo/excelx/drawing.rb | 42 +++++++++++++++++++++++++++++++++++++++ lib/roo/excelx/shared.rb | 3 ++- lib/roo/excelx/sheet.rb | 5 +++-- 4 files changed, 52 insertions(+), 5 deletions(-) create mode 100644 lib/roo/excelx/drawing.rb diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index 4c768d8d..ee4cbe40 100755 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -25,8 +25,9 @@ class Excelx < Roo::Base require 'roo/excelx/coordinate' require 'roo/excelx/format' require 'roo/excelx/images' + require 'roo/excelx/drawing' - delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared + delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files, :drawing_files] => :@shared ExceedsMaxError = Class.new(StandardError) # initialization and opening of a spreadsheet file @@ -458,9 +459,11 @@ def process_zipfile_entries(entries) rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}" when /drawing([0-9]+).xml.rels$/ # Extracting drawing relationships to make images lists for each sheet - binding.pry nr = Regexp.last_match[1].to_i image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}" + when /drawing([0-9]).xml$/ + nr = Regexp.last_match[1].to_i + drawing_files[nr - 1] = "#{@tmpdir}/roo_drawing#{nr}.xml" end entry.extract(path) if path diff --git a/lib/roo/excelx/drawing.rb b/lib/roo/excelx/drawing.rb new file mode 100644 index 00000000..b481add5 --- /dev/null +++ b/lib/roo/excelx/drawing.rb @@ -0,0 +1,42 @@ +require 'roo/excelx/extractor' + +module Roo + class Excelx + class Drawing < Excelx::Extractor + + # Returns: Hash { id1: cell_coordinates }, + # Example: { "rId1"=> [2,4], + # "rId2"=> [4,3], + # "rId3"=> [5,4] } + def list + @image_coordinates ||= extract_image_coordinates + end + + private + + def extract_image_coordinates + return {} unless doc_exists? + data = {} + + # Loop through all twoCellAnchor elements and extract the information + doc.xpath('//twoCellAnchor').each do |anchor| + # Extract the row and column numbers + from_col = anchor.at_xpath('./from/col').text.to_i + from_row = anchor.at_xpath('./from/row').text.to_i + to_col = anchor.at_xpath('./to/col').text.to_i + to_row = anchor.at_xpath('./to/row').text.to_i + + # Extract the rId attribute from the blip element + if anchor.at_xpath('./pic/blipFill/blip') + r_id = anchor.at_xpath('./pic/blipFill/blip')['embed'] + + # Store the extracted information in the data hash + data[r_id] = { from_col: from_col, from_row: from_row, to_col: to_col, to_row: to_row } + end + end + + data + end + end + end +end diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb index bcd2c08b..1af806a8 100755 --- a/lib/roo/excelx/shared.rb +++ b/lib/roo/excelx/shared.rb @@ -4,7 +4,7 @@ class Excelx # reduce memory usage and reduce the number of objects being passed # to various inititializers. class Shared - attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files + attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files, :drawing_files def initialize(dir, options = {}) @dir = dir @comments_files = [] @@ -13,6 +13,7 @@ def initialize(dir, options = {}) @options = options @image_rels = [] @image_files = [] + @drawing_files = [] end def styles diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb index 840a0533..990e3c62 100644 --- a/lib/roo/excelx/sheet.rb +++ b/lib/roo/excelx/sheet.rb @@ -4,9 +4,9 @@ class Excelx class Sheet extend Forwardable - delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared + delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :drawing_files, :image_files] => :@shared - attr_reader :images + attr_reader :images, :drawing def initialize(name, shared, sheet_index, options = {}) @name = name @@ -15,6 +15,7 @@ def initialize(name, shared, sheet_index, options = {}) @images = Images.new(image_rels[sheet_index]).list @rels = Relationships.new(rels_files[sheet_index]) @comments = Comments.new(comments_files[sheet_index]) + @drawing = Drawing.new(drawing_files[sheet_index]).list @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options) end From 81f94ebc99ce81bd6805a4f21d299b3666f01c65 Mon Sep 17 00:00:00 2001 From: Leonard Percival Date: Wed, 8 Mar 2023 14:30:46 +0000 Subject: [PATCH 3/6] improve documentation --- lib/roo/excelx/drawing.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/roo/excelx/drawing.rb b/lib/roo/excelx/drawing.rb index b481add5..059375a2 100644 --- a/lib/roo/excelx/drawing.rb +++ b/lib/roo/excelx/drawing.rb @@ -5,9 +5,10 @@ class Excelx class Drawing < Excelx::Extractor # Returns: Hash { id1: cell_coordinates }, - # Example: { "rId1"=> [2,4], - # "rId2"=> [4,3], - # "rId3"=> [5,4] } + # Example: { "rId1"=> { from_col: 2, from_row: 3, to_col: 2, to_row: 3 }, + # "rId2"=> { from_col: 2, from_row: 4, to_col: 2, to_row: 4 }, + # "rId3"=> { from_col: 2, from_row: 5, to_col: 2, to_row: 5 } } + # def list @image_coordinates ||= extract_image_coordinates end @@ -26,7 +27,7 @@ def extract_image_coordinates to_col = anchor.at_xpath('./to/col').text.to_i to_row = anchor.at_xpath('./to/row').text.to_i - # Extract the rId attribute from the blip element + # Extract the rId attribute from the blip element if present, if not ignore anchor element if anchor.at_xpath('./pic/blipFill/blip') r_id = anchor.at_xpath('./pic/blipFill/blip')['embed'] From 09d0d31fc2b3aec6b52d2a6072941e8e77cea1f5 Mon Sep 17 00:00:00 2001 From: Leonard Percival Date: Wed, 15 Mar 2023 15:36:17 +0000 Subject: [PATCH 4/6] now accounts for one cell anchor images too --- lib/roo/excelx/drawing.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/roo/excelx/drawing.rb b/lib/roo/excelx/drawing.rb index 059375a2..f392103d 100644 --- a/lib/roo/excelx/drawing.rb +++ b/lib/roo/excelx/drawing.rb @@ -36,6 +36,21 @@ def extract_image_coordinates end end + # Loop through all oneCellAnchor elements and extract the information + doc.xpath('//oneCellAnchor').each do |anchor| + # Extract the row and column numbers + from_col = anchor.at_xpath('./from/col')&.text&.to_i + from_row = anchor.at_xpath('./from/row')&.text&.to_i + + # Extract the rId attribute from the blip element if present, if not ignore anchor element + if anchor.at_xpath('./pic/blipFill/blip') + r_id = anchor.at_xpath('./pic/blipFill/blip')['embed'] + + # Store the extracted information in the data hash + data[r_id] = { from_col: from_col, from_row: from_row } + end + end + data end end From d9f99d0354562ba31fb3c4b4ad1976b4a151250b Mon Sep 17 00:00:00 2001 From: Leonard Percival Date: Mon, 20 Mar 2023 09:31:31 +0000 Subject: [PATCH 5/6] added in support for repeated images --- lib/roo/excelx/drawing.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/roo/excelx/drawing.rb b/lib/roo/excelx/drawing.rb index f392103d..0c7f02ef 100644 --- a/lib/roo/excelx/drawing.rb +++ b/lib/roo/excelx/drawing.rb @@ -17,7 +17,7 @@ def list def extract_image_coordinates return {} unless doc_exists? - data = {} + data = Hash.new { |hash, key| hash[key] = [] } # Loop through all twoCellAnchor elements and extract the information doc.xpath('//twoCellAnchor').each do |anchor| @@ -32,7 +32,7 @@ def extract_image_coordinates r_id = anchor.at_xpath('./pic/blipFill/blip')['embed'] # Store the extracted information in the data hash - data[r_id] = { from_col: from_col, from_row: from_row, to_col: to_col, to_row: to_row } + data[r_id] << { from_col: from_col, from_row: from_row, to_col: to_col, to_row: to_row } end end @@ -47,7 +47,7 @@ def extract_image_coordinates r_id = anchor.at_xpath('./pic/blipFill/blip')['embed'] # Store the extracted information in the data hash - data[r_id] = { from_col: from_col, from_row: from_row } + data[r_id] << { from_col: from_col, from_row: from_row } end end From 719cef176b69d830d1a0640b42ddcd5e91516881 Mon Sep 17 00:00:00 2001 From: Leonard Percival Date: Mon, 20 Mar 2023 09:31:41 +0000 Subject: [PATCH 6/6] improved comments --- lib/roo/excelx/drawing.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/roo/excelx/drawing.rb b/lib/roo/excelx/drawing.rb index 0c7f02ef..65584ddc 100644 --- a/lib/roo/excelx/drawing.rb +++ b/lib/roo/excelx/drawing.rb @@ -4,10 +4,10 @@ module Roo class Excelx class Drawing < Excelx::Extractor - # Returns: Hash { id1: cell_coordinates }, - # Example: { "rId1"=> { from_col: 2, from_row: 3, to_col: 2, to_row: 3 }, - # "rId2"=> { from_col: 2, from_row: 4, to_col: 2, to_row: 4 }, - # "rId3"=> { from_col: 2, from_row: 5, to_col: 2, to_row: 5 } } + # Returns: Hash { id1: [cell_coordinates] }, + # Example: { "rId1"=> [{ from_col: 2, from_row: 3, to_col: 2, to_row: 3 }, { from_col: 6, from_row: 3, to_col: 7, to_row: 3 }], + # "rId2"=> [{ from_col: 2, from_row: 4, to_col: 2, to_row: 4 }], + # "rId3"=> [{ from_col: 2, from_row: 5, to_col: 2, to_row: 5 }] } # def list @image_coordinates ||= extract_image_coordinates