Blob Blame History Raw
From 8cc7c19a8e0d97d14533e43ba7dbcc95c6e96e5a Mon Sep 17 00:00:00 2001
From: Joel Capitao <jcapitao@redhat.com>
Date: Fri, 1 Jul 2022 08:41:04 +0200
Subject: [PATCH] Replace unicodecsv by standard csv module

unicodecsv is not maintained since a while now [1].
It was preferred over standard csv because of the
unicode support. Now that Python3 csv module [2]
supports it, let's use it.

For more context, we hit issues while rebuilding
uncicodecsv during Fedora Python3.11 mass rebuild [3][4].

Patch proposed upstream [5]

[1] https://github.com/jdunck/python-unicodecsv
[2] https://docs.python.org/3/library/csv.html
[3] https://copr.fedorainfracloud.org/coprs/g/python/python3.11/package/python-unicodecsv/
[4] https://bugzilla.redhat.com/show_bug.cgi?id=2021938
[5] https://github.com/turicas/rows/pull/367
---
 rows/plugins/plugin_csv.py | 22 +++++++++++-----------
 setup.py                   |  3 +--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index 8a86c7d..124bf80 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -21,7 +21,7 @@ import sys
 from io import BytesIO
 
 import six
-import unicodecsv
+import csv
 
 from rows.plugins.utils import (
     create_table,
@@ -30,15 +30,15 @@ from rows.plugins.utils import (
     serialize,
 )
 
-sniffer = unicodecsv.Sniffer()
-unicodecsv.field_size_limit(sys.maxsize)
+sniffer = csv.Sniffer()
+csv.field_size_limit(sys.maxsize)
 
 
 def fix_dialect(dialect):
     if not dialect.doublequote and dialect.escapechar is None:
         dialect.doublequote = True
 
-    if dialect.quoting == unicodecsv.QUOTE_MINIMAL and dialect.quotechar == "'":
+    if dialect.quoting == csv.QUOTE_MINIMAL and dialect.quotechar == "'":
         # Python csv's Sniffer seems to detect a wrong quotechar when
         # quoting is minimal
         dialect.quotechar = '"'
@@ -54,8 +54,8 @@ if six.PY2:
         try:
             dialect = sniffer.sniff(sample, delimiters=delimiters)
 
-        except unicodecsv.Error:  # Couldn't detect: fall back to 'excel'
-            dialect = unicodecsv.excel
+        except csv.Error:  # Couldn't detect: fall back to 'excel'
+            dialect = csv.excel
 
         fix_dialect(dialect)
         return dialect
@@ -90,8 +90,8 @@ elif six.PY3:
         try:
             dialect = sniffer.sniff(decoded, delimiters=delimiters)
 
-        except unicodecsv.Error:  # Couldn't detect: fall back to 'excel'
-            dialect = unicodecsv.excel
+        except csv.Error:  # Couldn't detect: fall back to 'excel'
+            dialect = csv.excel
 
         fix_dialect(dialect)
         return dialect
@@ -125,7 +125,7 @@ def import_from_csv(
             sample=read_sample(fobj, sample_size), encoding=encoding
         )
 
-    reader = unicodecsv.reader(fobj, encoding=encoding, dialect=dialect)
+    reader = csv.reader(fobj, encoding=encoding, dialect=dialect)
 
     meta = {"imported_from": "csv", "filename": filename, "encoding": encoding}
     return create_table(reader, meta=meta, *args, **kwargs)
@@ -135,7 +135,7 @@ def export_to_csv(
     table,
     filename_or_fobj=None,
     encoding="utf-8",
-    dialect=unicodecsv.excel,
+    dialect=csv.excel,
     batch_size=100,
     callback=None,
     *args,
@@ -160,7 +160,7 @@ def export_to_csv(
     # TODO: may use `io.BufferedWriter` instead of `ipartition` so user can
     # choose the real size (in Bytes) when to flush to the file system, instead
     # number of rows
-    writer = unicodecsv.writer(fobj, encoding=encoding, dialect=dialect)
+    writer = csv.writer(fobj, encoding=encoding, dialect=dialect)
 
     if callback is None:
         for batch in ipartition(serialize(table, *args, **kwargs), batch_size):
diff --git a/setup.py b/setup.py
index 502dba5..342b402 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,6 @@ from __future__ import unicode_literals
 from setuptools import find_packages, setup
 
 EXTRA_REQUIREMENTS = {
-    "csv": ["unicodecsv"],
     "cli": ["click", "requests", "requests-cache", "tqdm"],
     "detect": ["file-magic"],
     "html": ["lxml"],  # apt: libxslt-dev libxml2-dev
@@ -35,7 +34,7 @@ EXTRA_REQUIREMENTS = {
     "xpath": ["lxml"],
 }
 EXTRA_REQUIREMENTS["all"] = sum(EXTRA_REQUIREMENTS.values(), [])
-INSTALL_REQUIREMENTS = ["six", "pathlib"] + EXTRA_REQUIREMENTS["csv"]
+INSTALL_REQUIREMENTS = ["six", "pathlib"]
 LONG_DESCRIPTION = """
 No matter in which format your tabular data is: rows will import it,
 automatically detect types and give you high-level Python objects so you can
-- 
2.35.3