Compare commits

...

1 Commits

Author SHA1 Message Date
Benjamin Otte
b24a3736dc gdk: Add GNewLineConverter
A converter that converts between different line endings - \r, \n and
\r\n.

This should probably live in glib, but for now it lives here.
2021-08-16 07:24:42 +02:00
5 changed files with 675 additions and 2 deletions

345
gdk/gnewlineconverter.c Normal file
View File

@@ -0,0 +1,345 @@
/* GIO - GLib Input, Output and Streaming Library
*
* Copyright (C) 2021 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
* Author: Benjamin Otte <otte@gnome.org>
*/
#include "config.h"
#include "gnewlineconverter.h"
#include "gdkintl.h"
enum {
PROP_0,
PROP_FROM_NEWLINE,
PROP_TO_NEWLINE
};
/**
* SECTION:gnewlineconverter
* @short_description: Convert between newlines
* @include: gio/gio.h
*
* #GNewlineConverter is an implementation of #GConverter that converts
* between different line endings. This is useful when converting data streams
* between Windows and UNIX compatibility.
*/
/**
* GNewlineConverter:
*
* Conversions of line endings.
*/
struct _GNewlineConverter
{
GObject parent_instance;
GDataStreamNewlineType from;
GDataStreamNewlineType to;
};
static void g_newline_converter_iface_init (GConverterIface *iface);
G_DEFINE_TYPE_WITH_CODE (GNewlineConverter, g_newline_converter, G_TYPE_OBJECT,
G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
g_newline_converter_iface_init))
static GConverterResult
g_newline_converter_convert (GConverter *converter,
const void *inbuf,
gsize inbuf_size,
void *outbuf,
gsize outbuf_size,
GConverterFlags flags,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
GNewlineConverter *self = G_NEWLINE_CONVERTER (converter);
GConverterResult ret;
const char *inbufp, *inbuf_end;
char *outbufp, *outbuf_end;
gsize size;
inbufp = inbuf;
inbuf_end = inbufp + inbuf_size;
outbufp = outbuf;
outbuf_end = outbufp + outbuf_size;
/* shortcut for the easy case, avoids special casing later */
if (self->from == self->to ||
self->to == G_DATA_STREAM_NEWLINE_TYPE_ANY)
{
size = MIN (inbuf_size, outbuf_size);
if (size > 0)
memcpy (outbufp, inbufp, size);
inbufp += size;
outbufp += size;
goto done;
}
/* ignore \r at end of input when we care about \r\n */
if ((flags & G_CONVERTER_INPUT_AT_END) == 0 &&
(self->from == G_DATA_STREAM_NEWLINE_TYPE_CR_LF ||
self->from == G_DATA_STREAM_NEWLINE_TYPE_ANY) &&
inbufp < inbuf_end &&
inbuf_end[-1] == '\r')
inbuf_end--;
while (inbufp < inbuf_end && outbufp < outbuf_end)
{
const char *linebreak;
switch (self->from)
{
case G_DATA_STREAM_NEWLINE_TYPE_LF:
linebreak = memchr (inbufp, '\n', inbuf_end - inbufp);
break;
case G_DATA_STREAM_NEWLINE_TYPE_CR:
linebreak = memchr (inbufp, '\r', inbuf_end - inbufp);
break;
case G_DATA_STREAM_NEWLINE_TYPE_CR_LF:
linebreak = inbufp;
while ((linebreak = memchr (linebreak, '\r', inbuf_end - linebreak)))
{
if (inbuf_end - linebreak > 1 &&
linebreak[1] == '\n')
break;
linebreak++;
}
break;
case G_DATA_STREAM_NEWLINE_TYPE_ANY:
{
const char *lf = memchr (inbufp, '\n', inbuf_end - inbufp);
linebreak = memchr (inbufp, '\r', (lf ? lf : inbuf_end) - inbufp);
if (linebreak == NULL)
linebreak = lf;
break;
}
default:
g_assert_not_reached();
break;
}
/* copy the part without linebreaks */
if (linebreak)
size = linebreak - inbufp;
else
size = inbuf_end - inbufp;
size = MIN (outbuf_end - outbufp, size);
if (size)
{
memcpy (outbufp, inbufp, size);
outbufp += size;
inbufp += size;
}
if (inbufp >= inbuf_end || outbufp >= outbuf_end)
break;
/* We should have broken above */
g_assert (linebreak != NULL);
g_assert (inbufp == linebreak);
switch (self->to)
{
case G_DATA_STREAM_NEWLINE_TYPE_LF:
*outbufp++ = '\n';
break;
case G_DATA_STREAM_NEWLINE_TYPE_CR:
*outbufp++ = '\r';
break;
case G_DATA_STREAM_NEWLINE_TYPE_CR_LF:
if (outbuf_end - outbufp < 2)
goto done;
*outbufp++ = '\r';
*outbufp++ = '\n';
break;
case G_DATA_STREAM_NEWLINE_TYPE_ANY:
default:
g_assert_not_reached();
break;
}
switch (self->from)
{
case G_DATA_STREAM_NEWLINE_TYPE_LF:
case G_DATA_STREAM_NEWLINE_TYPE_CR:
inbufp++;
break;
case G_DATA_STREAM_NEWLINE_TYPE_CR_LF:
inbufp += 2;
break;
case G_DATA_STREAM_NEWLINE_TYPE_ANY:
if (inbuf_end - inbufp > 1 && inbufp[0] == '\r' && inbufp[1] == '\n')
inbufp += 2;
else
inbufp++;
break;
default:
g_assert_not_reached ();
break;
}
}
done:
if (inbufp == inbuf &&
!(flags & G_CONVERTER_FLUSH))
{
g_assert (outbufp == outbuf);
g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
_("Not enough input"));
return G_CONVERTER_ERROR;
}
if ((flags & G_CONVERTER_INPUT_AT_END) &&
inbufp == inbuf_end)
ret = G_CONVERTER_FINISHED;
else if (flags & G_CONVERTER_FLUSH)
ret = G_CONVERTER_FLUSHED;
else
ret = G_CONVERTER_CONVERTED;
*bytes_read = inbufp - (const char *) inbuf;
*bytes_written = outbufp - (char *) outbuf;
return ret;
}
static void
g_newline_converter_reset (GConverter *converter)
{
/* nothing to do here */
}
static void
g_newline_converter_iface_init (GConverterIface *iface)
{
iface->convert = g_newline_converter_convert;
iface->reset = g_newline_converter_reset;
}
static void
g_newline_converter_set_property (GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
GNewlineConverter *conv;
conv = G_NEWLINE_CONVERTER (object);
switch (prop_id)
{
case PROP_TO_NEWLINE:
conv->to = g_value_get_enum (value);
break;
case PROP_FROM_NEWLINE:
conv->from = g_value_get_enum (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
g_newline_converter_get_property (GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
GNewlineConverter *conv;
conv = G_NEWLINE_CONVERTER (object);
switch (prop_id)
{
case PROP_TO_NEWLINE:
g_value_set_enum (value, conv->to);
break;
case PROP_FROM_NEWLINE:
g_value_set_enum (value, conv->from);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
g_newline_converter_class_init (GNewlineConverterClass *klass)
{
GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
gobject_class->get_property = g_newline_converter_get_property;
gobject_class->set_property = g_newline_converter_set_property;
g_object_class_install_property (gobject_class,
PROP_TO_NEWLINE,
g_param_spec_enum ("to-newline",
P_("To Newline"),
P_("The newline type to convert to"),
G_TYPE_DATA_STREAM_NEWLINE_TYPE,
G_DATA_STREAM_NEWLINE_TYPE_LF,
G_PARAM_READWRITE|G_PARAM_STATIC_NAME|G_PARAM_STATIC_BLURB));
g_object_class_install_property (gobject_class,
PROP_FROM_NEWLINE,
g_param_spec_enum ("from-newline",
P_("From Newline"),
P_("The newline type to convert from"),
G_TYPE_DATA_STREAM_NEWLINE_TYPE,
G_DATA_STREAM_NEWLINE_TYPE_LF,
G_PARAM_READWRITE|G_PARAM_STATIC_NAME|G_PARAM_STATIC_BLURB));
}
static void
g_newline_converter_init (GNewlineConverter *local)
{
}
/**
* g_newline_converter_new:
* @to_newline: destination newline
* @from_newline: source newline
*
* Creates a new #GNewlineConverter.
*
* Returns: a new #GNewlineConverter
**/
GNewlineConverter *
g_newline_converter_new (GDataStreamNewlineType to_newline,
GDataStreamNewlineType from_newline)
{
GNewlineConverter *conv;
conv = g_object_new (G_TYPE_NEWLINE_CONVERTER,
"to-newline", to_newline,
"from-newline", from_newline,
NULL);
return conv;
}

60
gdk/gnewlineconverter.h Normal file
View File

@@ -0,0 +1,60 @@
/* GIO - GLib Input, Output and Streaming Library
*
* Copyright (C) 2021 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
* Author: Benjamin Otte <otte@gnome.org>
*/
#ifndef __G_NEWLINE_CONVERTER_H__
#define __G_NEWLINE_CONVERTER_H__
#if 0
#if !defined (__GIO_GIO_H_INSIDE__) && !defined (GIO_COMPILATION)
#error "Only <gio/gio.h> can be included directly."
#endif
#include <gio/gconverter.h>
#else
#include <gio/gio.h>
#endif
G_BEGIN_DECLS
#define G_TYPE_NEWLINE_CONVERTER (g_newline_converter_get_type ())
#define G_NEWLINE_CONVERTER(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), G_TYPE_NEWLINE_CONVERTER, GNewlineConverter))
#define G_NEWLINE_CONVERTER_CLASS(k) (G_TYPE_CHECK_CLASS_CAST((k), G_TYPE_NEWLINE_CONVERTER, GNewlineConverterClass))
#define G_IS_NEWLINE_CONVERTER(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), G_TYPE_NEWLINE_CONVERTER))
#define G_IS_NEWLINE_CONVERTER_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), G_TYPE_NEWLINE_CONVERTER))
#define G_NEWLINE_CONVERTER_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), G_TYPE_NEWLINE_CONVERTER, GNewlineConverterClass))
typedef struct _GNewlineConverter GNewlineConverter;
typedef struct _GNewlineConverterClass GNewlineConverterClass;
struct _GNewlineConverterClass
{
GObjectClass parent_class;
};
GLIB_AVAILABLE_IN_ALL
GType g_newline_converter_get_type (void) G_GNUC_CONST;
GLIB_AVAILABLE_IN_ALL
GNewlineConverter * g_newline_converter_new (GDataStreamNewlineType to_newline,
GDataStreamNewlineType from_newline);
G_END_DECLS
#endif /* __G_NEWLINE_CONVERTER_H__ */

View File

@@ -50,6 +50,7 @@ gdk_public_sources = files([
'gdktoplevelsize.c',
'gdktoplevel.c',
'gdkdragsurface.c',
'gnewlineconverter.c',
])
gdk_public_headers = files([
@@ -110,6 +111,7 @@ gdk_private_h_sources = files([
'gdkmonitorprivate.h',
'gdkseatdefaultprivate.h',
'gdktoplevelsizeprivate.h',
'gnewlineconverter.h'
])
gdk_gresource_xml = configure_file(output: 'gdk.gresource.xml',

View File

@@ -113,11 +113,12 @@ internal_tests = [
'../testutils.c'
],
},
{ 'name': 'imcontext' },
{ 'name': 'constraint-solver' },
{ 'name': 'rbtree-crash' },
{ 'name': 'imcontext' },
{ 'name': 'newlineconverter' },
{ 'name': 'propertylookuplistmodel' },
{ 'name': 'rbtree' },
{ 'name': 'rbtree-crash' },
{ 'name': 'timsort' },
{ 'name': 'texthistory' },
{ 'name': 'fnmatch' },

View File

@@ -0,0 +1,265 @@
/*
* Copyright © 2021 Benjamin Otte
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
* Authors: Benjamin Otte <otte@gnome.org>
*/
#include <locale.h>
#include <gdk/gnewlineconverter.h>
#define N 100
#define MAX_SIZE 20
static const char *words[] = { "", "", "lorem", "ipsum", "dolor", "sit", "amet",
"consectetur", "adipisci", "elit", "sed", "eiusmod", "tempor", "incidunt",
"labore", "et", "dolore", "magna", "aliqua", "ut", "enim", "ad", "minim",
"veniam", "quis", "nostrud", "exercitation", "ullamco", "laboris", "nisi",
"ut", "aliquid", "ex", "ea", "commodi", "consequat" };
static const char *breaks[] = { "", "\r", "\n", "\r\n" };
static GBytes *
generate_random_text (gboolean fuzz)
{
GByteArray *array;
guint i, n;
array = g_byte_array_new ();
n = g_test_rand_int_range (0, 100);
for (i = 0; i < n; i++)
{
const char *word;
word = words[g_test_rand_int_range (0, G_N_ELEMENTS (words))];
g_byte_array_append (array, (const guchar *) word, strlen (word));
word = breaks[g_test_rand_int_range (0, G_N_ELEMENTS (breaks))];
g_byte_array_append (array, (const guchar *) word, strlen (word));
}
if (fuzz && array->len > 0)
{
for (i = 0; i < 100; i++)
{
array->data[g_test_rand_int_range (0, array->len)] = g_test_rand_int_range (0, 255);
}
}
return g_byte_array_free_to_bytes (array);
}
static GBytes *
convert (GBytes *input,
GDataStreamNewlineType to_newline,
GDataStreamNewlineType from_newline)
{
GConverter *converter;
GByteArray *output;
const guchar *inbuf, *inbuf_end;
gsize inbuf_size;
output = g_byte_array_new ();
converter = G_CONVERTER (g_newline_converter_new (to_newline, from_newline));
inbuf = g_bytes_get_data (input, &inbuf_size);
inbuf_end = inbuf + inbuf_size;
while (inbuf < inbuf_end)
{
gsize in_size, out_size, bytes_read, bytes_written;
guchar outbuf[MAX_SIZE];
GConverterResult res;
GError *error = NULL;
in_size = g_test_rand_int_range (1, MAX_SIZE);
in_size = MIN (in_size, inbuf_end - inbuf);
out_size = g_test_rand_int_range (1, MAX_SIZE);
res = g_converter_convert (converter,
inbuf,
in_size,
outbuf,
out_size,
inbuf + in_size == inbuf_end ? G_CONVERTER_INPUT_AT_END : 0,
&bytes_read,
&bytes_written,
&error);
switch (res)
{
case G_CONVERTER_ERROR:
g_assert_nonnull (error);
if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT))
{
g_assert (bytes_read == 0);
g_assert (bytes_written == 0);
g_clear_error (&error);
continue;
}
/* There should never be any other error, but this check is more informative
* than assert_not_reached () */
g_assert_no_error (error);
break;
case G_CONVERTER_CONVERTED:
case G_CONVERTER_FINISHED:
g_assert_no_error (error);
g_assert (bytes_read > 0);
g_assert (bytes_written > 0);
g_assert (bytes_read <= in_size);
g_assert (bytes_written <= out_size);
inbuf += bytes_read;
g_byte_array_append (output, outbuf, bytes_written);
if (res == G_CONVERTER_FINISHED)
g_assert (inbuf == inbuf_end);
else
g_assert (inbuf < inbuf_end);
break;
case G_CONVERTER_FLUSHED:
/* we don't pass FLUSH, so it should never happen */
g_assert_not_reached ();
break;
default:
g_assert_not_reached ();
break;
}
}
return g_byte_array_free_to_bytes (output);
}
#define assert_bytes_equal(one, two) G_STMT_START{\
const char *one_data, *two_data; \
gsize one_size, two_size; \
one_data = g_bytes_get_data (one, &one_size); \
two_data = g_bytes_get_data (two, &two_size); \
g_assert_cmpmem (one_data, one_size, two_data, two_size); \
} G_STMT_END
#define assert_bytes_equal_text(one, two) G_STMT_START{\
const char *one_data, *two_data; \
char **a, **b; \
gsize ai, bi; \
one_data = g_bytes_get_data (one, NULL); \
two_data = g_bytes_get_data (two, NULL); \
a = g_strsplit_set (one_data ? one_data : "", "\r\n", -1); \
b = g_strsplit_set (two_data ? two_data : "", "\r\n", -1); \
for (ai = bi = 0; a[ai] && b[bi]; ai++) \
{ \
if (*a[ai] == 0) \
continue; \
for (; b[bi]; bi++) \
{ \
if (*b[bi] == 0) \
continue; \
} \
if (!b[bi]) \
break; \
g_assert_cmpstr (a[ai], ==, b[bi]); \
}\
g_strfreev(b);\
g_strfreev(a);\
} G_STMT_END
static void
test_intermediate (void)
{
GBytes *input, *output1, *output2, *tmp;
gsize i;
GDataStreamNewlineType target, intermediate;
for (i = 0; i < N; i++)
{
target = g_test_rand_int_range (0, 3); /* not including any here */
intermediate = g_test_rand_int_range (0, 4); /* can include any */
input = generate_random_text (TRUE);
output1 = convert (input, target, G_DATA_STREAM_NEWLINE_TYPE_ANY);
tmp = convert (input, intermediate, G_DATA_STREAM_NEWLINE_TYPE_ANY);
output2 = convert (tmp, target, intermediate);
assert_bytes_equal (output1, output2);
g_bytes_unref (tmp);
g_bytes_unref (output2);
g_bytes_unref (output1);
g_bytes_unref (input);
}
}
static void
test_conversion_and_back (void)
{
GBytes *input, *output1, *output2, *output3, *tmp;
gsize i;
GDataStreamNewlineType start, target;
for (i = 0; i < N; i++)
{
start = g_test_rand_bit () ? G_DATA_STREAM_NEWLINE_TYPE_CR : G_DATA_STREAM_NEWLINE_TYPE_LF;
target = g_test_rand_int_range (0, 3); /* not including any here */
tmp = generate_random_text (g_test_rand_bit ());
/* convert either all CR => LF or all LF => CR */
input = convert (tmp, start, start == G_DATA_STREAM_NEWLINE_TYPE_LF ? G_DATA_STREAM_NEWLINE_TYPE_CR : G_DATA_STREAM_NEWLINE_TYPE_LF);
g_bytes_unref (tmp);
output1 = convert (input, target, start);
output2 = convert (output1, start, target);
output3 = convert (input, target, G_DATA_STREAM_NEWLINE_TYPE_ANY);
assert_bytes_equal (output1, output3);
assert_bytes_equal (input, output2);
g_bytes_unref (output3);
g_bytes_unref (output2);
g_bytes_unref (output1);
g_bytes_unref (input);
}
}
static void
test_simple (void)
{
GBytes *input, *output;
gsize i;
for (i = 0; i < N; i++)
{
input = generate_random_text (FALSE);
output = convert (input, g_test_rand_int_range (0, 4), g_test_rand_int_range (0, 4));
assert_bytes_equal_text (input, output);
g_bytes_unref (output);
g_bytes_unref (input);
}
}
int
main (int argc, char *argv[])
{
(g_test_init) (&argc, &argv, NULL);
setlocale (LC_ALL, "C");
g_test_add_func ("/newlineconverter/simple", test_simple);
g_test_add_func ("/newlineconverter/intermediate", test_intermediate);
g_test_add_func ("/newlineconverter/conversion_and_back", test_conversion_and_back);
return g_test_run ();
}