From 8cc7c19a8e0d97d14533e43ba7dbcc95c6e96e5a Mon Sep 17 00:00:00 2001
From: Joel Capitao <jcapitao@redhat.com>
Date: Fri, 1 Jul 2022 08:41:04 +0200
Subject: [PATCH] Replace unicodecsv by standard csv module
unicodecsv is not maintained since a while now [1].
It was preferred over standard csv because of the
unicode support. Now that Python3 csv module [2]
supports it, let's use it.
For more context, we hit issues while rebuilding
uncicodecsv during Fedora Python3.11 mass rebuild [3][4].
Patch proposed upstream [5]
[1] https://github.com/jdunck/python-unicodecsv
[2] https://docs.python.org/3/library/csv.html
[3] https://copr.fedorainfracloud.org/coprs/g/python/python3.11/package/python-unicodecsv/
[4] https://bugzilla.redhat.com/show_bug.cgi?id=2021938
[5] https://github.com/turicas/rows/pull/367
---
rows/plugins/plugin_csv.py | 22 +++++++++++-----------
setup.py | 3 +--
2 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index 8a86c7d..124bf80 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -21,7 +21,7 @@ import sys
from io import BytesIO
import six
-import unicodecsv
+import csv
from rows.plugins.utils import (
create_table,
@@ -30,15 +30,15 @@ from rows.plugins.utils import (
serialize,
)
-sniffer = unicodecsv.Sniffer()
-unicodecsv.field_size_limit(sys.maxsize)
+sniffer = csv.Sniffer()
+csv.field_size_limit(sys.maxsize)
def fix_dialect(dialect):
if not dialect.doublequote and dialect.escapechar is None:
dialect.doublequote = True
- if dialect.quoting == unicodecsv.QUOTE_MINIMAL and dialect.quotechar == "'":
+ if dialect.quoting == csv.QUOTE_MINIMAL and dialect.quotechar == "'":
# Python csv's Sniffer seems to detect a wrong quotechar when
# quoting is minimal
dialect.quotechar = '"'
@@ -54,8 +54,8 @@ if six.PY2:
try:
dialect = sniffer.sniff(sample, delimiters=delimiters)
- except unicodecsv.Error: # Couldn't detect: fall back to 'excel'
- dialect = unicodecsv.excel
+ except csv.Error: # Couldn't detect: fall back to 'excel'
+ dialect = csv.excel
fix_dialect(dialect)
return dialect
@@ -90,8 +90,8 @@ elif six.PY3:
try:
dialect = sniffer.sniff(decoded, delimiters=delimiters)
- except unicodecsv.Error: # Couldn't detect: fall back to 'excel'
- dialect = unicodecsv.excel
+ except csv.Error: # Couldn't detect: fall back to 'excel'
+ dialect = csv.excel
fix_dialect(dialect)
return dialect
@@ -125,7 +125,7 @@ def import_from_csv(
sample=read_sample(fobj, sample_size), encoding=encoding
)
- reader = unicodecsv.reader(fobj, encoding=encoding, dialect=dialect)
+ reader = csv.reader(fobj, encoding=encoding, dialect=dialect)
meta = {"imported_from": "csv", "filename": filename, "encoding": encoding}
return create_table(reader, meta=meta, *args, **kwargs)
@@ -135,7 +135,7 @@ def export_to_csv(
table,
filename_or_fobj=None,
encoding="utf-8",
- dialect=unicodecsv.excel,
+ dialect=csv.excel,
batch_size=100,
callback=None,
*args,
@@ -160,7 +160,7 @@ def export_to_csv(
# TODO: may use `io.BufferedWriter` instead of `ipartition` so user can
# choose the real size (in Bytes) when to flush to the file system, instead
# number of rows
- writer = unicodecsv.writer(fobj, encoding=encoding, dialect=dialect)
+ writer = csv.writer(fobj, encoding=encoding, dialect=dialect)
if callback is None:
for batch in ipartition(serialize(table, *args, **kwargs), batch_size):
diff --git a/setup.py b/setup.py
index 502dba5..342b402 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,6 @@ from __future__ import unicode_literals
from setuptools import find_packages, setup
EXTRA_REQUIREMENTS = {
- "csv": ["unicodecsv"],
"cli": ["click", "requests", "requests-cache", "tqdm"],
"detect": ["file-magic"],
"html": ["lxml"], # apt: libxslt-dev libxml2-dev
@@ -35,7 +34,7 @@ EXTRA_REQUIREMENTS = {
"xpath": ["lxml"],
}
EXTRA_REQUIREMENTS["all"] = sum(EXTRA_REQUIREMENTS.values(), [])
-INSTALL_REQUIREMENTS = ["six", "pathlib"] + EXTRA_REQUIREMENTS["csv"]
+INSTALL_REQUIREMENTS = ["six", "pathlib"]
LONG_DESCRIPTION = """
No matter in which format your tabular data is: rows will import it,
automatically detect types and give you high-level Python objects so you can
--
2.35.3