Module:CsvUtils

--- -- Module for processing retrieving and returning data stored in CSV format in -- wiki templates. There can be documentation in the template that will be -- automatically removed provided it is enclosed in block. The data -- must be wrapped in to protect line breaks. The block will also -- be removed. This module also provides a parsing method to convert well- -- structured CSV data into a Lua table. -- @module CsvUtils local CsvUtils = {}

--- -- Loads the data from a wiki page (usually a template). This method removes -- everything from the page content except the raw data, including anything -- enclosed in and including the enclosing tags that protect -- line breaks in the raw data. Returns nil if the page could not be loaded or -- if there was a problem processing the page content. -- -- @param dataPageName (string) The name of the page containing the CSV data, -- including the namespace, like this: 'Template:Workshops_Recipes_csv' -- @return string The raw CSV data. -- function CsvUtils.extractCSV(dataPageName) -- Load the page and verify something returned correctly. If the page was -- not found, then the variable will be nil or the page won't exist. local csvPage = mw.title.new(dataPageName) if not csvPage or not csvPage.exists then error("Could not find data page: " .. dataPageName) end -- Get the content of the page, or it will be nil if there is no page. We -- can also expect there's a problem if there's an empty string. local pageContent = csvPage:getContent if not pageContent or pageContent == "" then error("Data page content does not exist: " .. dataPageName) end -- Process the content of the full wikipage, expanding templates and -- applying wikimarkup. If it didn't work or returns an empty string, then -- there's a problem. local frame = mw.getCurrentFrame local preprocessedText = frame:preprocess(pageContent) if not preprocessedText or preprocessedText == "" then error("Data page content could not be preprocessed " .. dataPageName) end -- Remove everything within and including the tags. If this -- results in an empty string, there was a problem. Since we're still -- dealing with mediawiki content, use mw.ustring instead of Lua string -- library. local trimmedText = mw.ustring.gsub(pageContent, " .- ", "") if trimmedText == "" then error("Trimming the data page content resulted in no csv data left: " .. dataPageName) end -- Extract the data from the enclosing block, leaving only the csv -- data. If this results in an empty string, there was a problem. local rawCSV = trimmedText:match(" (.-) ") if rawCSV == "" then error("Trimming the data page content resulted in no csv data left: " .. dataPageName) end return rawCSV end -- extractCSV function

--- -- Converts a CSV data string into a Lua table. The CSV data should have a -- header row with field names. Each row represents a new record, and each -- column contains specific data for that record. The resulting Lua table has -- sub-tables for each row. Indices can be used or the names of the header row -- will be usable as keys to access the fields. -- For example: -- row[1] is the same as row["id"] (if the first header in the header row -- contains the string "id") -- -- @function luaTableFromCSV -- @param csvString (string) The CSV data as a string. -- @return table A Lua table containing the CSV data. -- function CsvUtils.luaTableFromCSV(csvString)

-- top level lua table, header row, and data rows (table of rows) local luaTable = {} local header = {} local rows = {} local headerLookup = {} -- stores header indices for constant time lookup

-- Start by cleaning (encoding) the string up so it can be processed without -- getting confused by any special characters like commas or newlines within -- quotations. encodedString = encodeCSV(csvString) if encodedString == "" then error("Encoding the csv data string removed all information.") end

-- Separate the csv data into lines/rows by gmatch'ing on carriage returns -- and/or newlines. Then make each line a new row (or the header row). for line in encodedString:gmatch("[^\r\n]+") do		if not line or line == "" then error("Splitting csv data into rows failed.") end -- We only build the header once; then its length will be > 0 -- The first line will be the header, and after we build that, -- everything else goes into the rows table. if #header == 0 then -- Count the headers to use to populate a header lookup table local i = 1 -- Separate the header row into fields by gmatch'ing again, this -- time on commas. We can use plus here because there are no empty -- header cells. for field in line:gmatch("([^,]+)") do				if not field or field == "" then error("Splitting row into fields failed on header row.") end -- Then append each split field to the header row. header[i] = field headerLookup[field] = i				i = i + 1 end -- Header row done; add it after all the other lines are processed. if #header == 0 then error("Adding fields was not successful to header row.") end else -- Build each row separately as a new table. local row = {} -- Keep track of indices so fields can be named with header row -- names. local i = 1 -- Separate the line into fields by gmatch'ing on commas again. Use -- an asterisk here instead of a plus because there are empty cells -- with zero length. The rows must end in a comma to do this -- correctly or there will be empty strings added between every -- field in the resulting table. for field in line:gmatch("([^,]*),") do -- Empty strings are allowed here. if not field then error("Splitting row into fields failed on field number: " .. i 					.. " on non-header row number: " .. #rows+1) end -- Append to the new row by assigning the key. row[i] = field i = i + 1 end -- Row done. Append the new row to the rows table now. if #row == 0 then error("Adding fields was not successful to row number: " .. i)			end table.insert(rows, row) end end -- Now put the header and rows into the luaTable if #rows == 0 then error("There are no rows to add to the table.") end table.insert(luaTable, header) table.insert(luaTable, rows) return luaTable, headerLookup end -- luaTableFromCSV function

--- -- Replaces apostrophes and double quotes in a string with their respective -- HTML character codes. -- -- @param stringToEncode The input string to be processed. -- @return The modified string with special characters replaced. -- function encodeCSV(stringToEncode) -- Call a function on quotations to replace newlines within them. Capture -- the entire quote, including the bounding quotation marks. local encodedString = stringToEncode:gsub("(\".-\")", encodeQuotations) -- Now that the content within quotes are handled: -- Sub double quotes for their HTML code. encodedString = encodedString:gsub("\"", "&quot;")	if encodedString == "" then		error("Encoding double quotation marks resulted in no data left.")	end	-- Sub single quotes for their HTML code.	--encodedString = encodedString:gsub("'", "&#39;")	--if encodedString == "" then	--	error("Encoding single quotation marks resulted in no data left.")	--end	return encodedString end

--- -- Local function to replace newlines that occur within quoted strings with -- their HTML codes, so that they aren't interpreted as newlines that separate -- records in the csv data string. -- -- @param stringToEncode The input string to be processed. -- @return The modified string with newlines replaced. function encodeQuotations(stringToEncode) -- Sub out newlines within this matched quotation local encodedString = stringToEncode:gsub('[\r\n]+', '&#10;') if encodedString == "" then error("Encoding newlines inside quoted descriptions resulted in no data left.") end -- Sub out commas within the matched quotation local encodedString = encodedString:gsub(",", "&comma;") if encodedString == "" then error("Encoding commas inside quoted descriptions resulted in no data left.") end return encodedString end

return CsvUtils