syndilights/blib-1.1.7/blib/bparser.c

467 lines
13 KiB
C

/* blib - Library of useful things to hack the Blinkenlights
*
* Copyright (C) 2002 The Blinkenlights Crew
* Sven Neumann <sven@gimp.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "config.h"
#include <string.h>
#include <stdlib.h>
#include <glib-object.h>
#include "btypes.h"
#include "bparser.h"
struct _BParser
{
GMarkupParseContext *context;
BParserState state;
BParserState last_state;
gint unknown_depth;
GString *cdata;
gpointer user_data;
BParserStartFunc start_element;
BParserEndFunc end_element;
};
static void parser_start_element (GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError **error);
static void parser_end_element (GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error);
static void parser_text (GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error);
static void parser_start_unknown (BParser *parser);
static void parser_end_unknown (BParser *parser);
static const GMarkupParser markup_parser =
{
parser_start_element,
parser_end_element,
parser_text,
NULL, /* passthrough */
NULL, /* error */
};
/**
* b_parser_new:
* @start_element: the function to call when an element is started
* @end_element: the function to call when an element is closed
* @user_data: data to pass to the functions above
*
* Creates a new #BParser suited to parse XML files. The #BParser
* should later be freed using b_parser_free().
*
* Return value: a newly allocated #BParser
**/
BParser *
b_parser_new (BParserStartFunc start_element,
BParserEndFunc end_element,
gpointer user_data)
{
BParser *parser;
parser = g_new0 (BParser, 1);
parser->context = g_markup_parse_context_new (&markup_parser,
0, parser, NULL);
parser->state = B_PARSER_STATE_TOPLEVEL;
parser->cdata = g_string_new (NULL);
parser->user_data = user_data;
parser->start_element = start_element;
parser->end_element = end_element;
return parser;
}
/**
* b_parser_parse:
* @parser: a #BParser
* @text: pointer to a text buffer to parse
* @text_len: the number of bytes to parse from @text
* @error: location to store the error occuring, or %NULL to ignore errors
*
* Let the @parser process a chunk of @text. You need to call
* b_parser_end_parse() after you passed the last chunk to the @parser.
*
* Return value: %TRUE if parsing was successful, %FALSE if an error occured
**/
gboolean
b_parser_parse (BParser *parser,
const gchar *text,
gssize text_len,
GError **error)
{
g_return_val_if_fail (parser != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
return g_markup_parse_context_parse (parser->context, text, text_len, error);
}
/**
* b_parser_end_parse:
* @parser: a #BParser
* @error: location to store the error occuring, or %NULL to ignore errors
*
* Finishes the @parser. After calling this function, you must not
* call b_parser_parse() on the parser again.
*
* Return value: %TRUE if @parser was successfully finished, %FALSE
* otherwise
**/
gboolean
b_parser_end_parse (BParser *parser,
GError **error)
{
g_return_val_if_fail (parser != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
return g_markup_parse_context_end_parse (parser->context, error);
}
/**
* b_parser_parse_io_channel:
* @parser: a #BParser
* @io: a #GIOChannel to read the text to parse from
* @recode: %TRUE if you want the parser to do automatic encoding conversion
* @error: location to store the error occuring, or %NULL to ignore errors
*
* Reads data from the #GIOChannel @io and passes it to @parser. If
* @recode is TRUE, the data should start with an XML header so this
* function can determine the encoding of the XML data and convert it
* to UTF-8 for you.
*
* Return value: %TRUE if parsing was successful, %FALSE otherwise
**/
gboolean
b_parser_parse_io_channel (BParser *parser,
GIOChannel *io,
gboolean recode,
GError **error)
{
GIOStatus status;
guchar buffer[8192];
gsize len = 0;
gsize bytes;
g_return_val_if_fail (parser != NULL, FALSE);
g_return_val_if_fail (io != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
if (recode)
{
const gchar *io_encoding = g_io_channel_get_encoding (io);
gchar *encoding = NULL;
if (io_encoding && strcmp (io_encoding, "UTF-8"))
{
g_warning ("b_parser_parse_io_channel(): "
"The encoding has already been set on this IOChannel!");
return FALSE;
}
/* try to determine the encoding */
g_io_channel_set_encoding (io, NULL, NULL);
while (len < sizeof (buffer) && !encoding)
{
status = g_io_channel_read_chars (io,
buffer + len, 1, &bytes, error);
len += bytes;
if (status == G_IO_STATUS_ERROR)
return FALSE;
if (status == G_IO_STATUS_EOF)
break;
encoding = b_parse_encoding (buffer, len);
}
if (encoding)
{
if (!g_io_channel_set_encoding (io, encoding, error))
return FALSE;
g_free (encoding);
}
else
{
g_io_channel_set_encoding (io, "UTF-8", NULL);
}
}
while (TRUE)
{
if (!b_parser_parse (parser, buffer, len, error))
return FALSE;
status = g_io_channel_read_chars (io,
buffer, sizeof(buffer), &len, error);
switch (status)
{
case G_IO_STATUS_ERROR:
return FALSE;
case G_IO_STATUS_EOF:
return b_parser_end_parse (parser, error);
case G_IO_STATUS_NORMAL:
case G_IO_STATUS_AGAIN:
break;
}
}
}
/**
* b_parser_free:
* @parser: a #BParser
*
* Frees the resources allocated for @parser. You must not access
* @parser after calling this function.
**/
void
b_parser_free (BParser *parser)
{
g_return_if_fail (parser != NULL);
g_markup_parse_context_free (parser->context);
g_string_free (parser->cdata, TRUE);
g_free (parser);
}
/**
* b_parser_get_state:
* @parser: a #BParser
*
* Retrieves the current state of @parser.
*
* Return value: the state of @parser
**/
BParserState
b_parser_get_state (BParser *parser)
{
g_return_val_if_fail (parser != NULL, B_PARSER_STATE_UNKNOWN);
return parser->state;
}
static void
parser_start_element (GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError **error)
{
BParserState new_state;
BParser *parser = (BParser *) user_data;
switch (parser->state)
{
case B_PARSER_STATE_TOPLEVEL:
default:
if (parser->start_element &&
(new_state = parser->start_element (parser->state,
element_name,
attribute_names,
attribute_values,
parser->user_data,
error)))
{
parser->last_state = parser->state;
parser->state = new_state;
break;
}
/* else fallthru */
case B_PARSER_STATE_UNKNOWN:
parser_start_unknown (parser);
break;
}
g_string_truncate (parser->cdata, 0);
}
static void
parser_end_element (GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error)
{
BParser *parser = (BParser *) user_data;
switch (parser->state)
{
case B_PARSER_STATE_TOPLEVEL:
g_assert_not_reached ();
break;
default:
if (parser->end_element)
{
gint len;
/* strip trailing spaces */
for (len = parser->cdata->len;
len > 0 && g_ascii_isspace (parser->cdata->str[len-1]);
len--)
; /* do nothing */
g_string_truncate (parser->cdata, len);
parser->state = parser->end_element (parser->state,
element_name,
parser->cdata->str,
parser->cdata->len,
parser->user_data,
error);
break;
}
/* else fallthru */
case B_PARSER_STATE_UNKNOWN:
parser_end_unknown (parser);
break;
}
g_string_truncate (parser->cdata, 0);
}
static void
parser_text (GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error)
{
BParser *parser = (BParser *) user_data;
gboolean space;
gint i;
space = (parser->cdata->len == 0 ||
g_ascii_isspace (parser->cdata->str[parser->cdata->len]));
for (i = 0; i < text_len; i++)
{
if (g_ascii_isspace (text[i]))
{
if (space)
continue;
space = TRUE;
}
else
{
space = FALSE;
}
g_string_append_c (parser->cdata, text[i]);
}
}
static void
parser_start_unknown (BParser *parser)
{
if (parser->unknown_depth == 0)
{
parser->last_state = parser->state;
parser->state = B_PARSER_STATE_UNKNOWN;
}
parser->unknown_depth++;
}
static void
parser_end_unknown (BParser *parser)
{
parser->unknown_depth--;
if (parser->unknown_depth == 0)
parser->state = parser->last_state;
}
/**
* b_parse_encoding:
* @text: a string to parse, must be at least 20 bytes
* @text_len: the maximum number of bytes to parse from @text
*
* Scans the @text for an XML header with encoding specification.
*
* Return value: a copy of the encoding string or %NULL if none was
* found
**/
gchar *
b_parse_encoding (const gchar *text,
gint text_len)
{
const gchar *start;
const gchar *end;
gint i;
g_return_val_if_fail (text, NULL);
if (text_len < 20)
return NULL;
start = g_strstr_len (text, text_len, "<?xml");
if (!start)
return NULL;
end = g_strstr_len (start, text_len - (start - text), "?>");
if (!end)
return NULL;
text_len = end - start;
if (text_len < 12)
return NULL;
start = g_strstr_len (start + 1, text_len - 1, "encoding=");
if (!start)
return NULL;
start += 9;
if (*start != '\"' && *start != '\'')
return NULL;
text_len = end - start;
if (text_len < 1)
return NULL;
for (i = 1; i < text_len; i++)
if (start[i] == start[0])
break;
if (i == text_len || i < 3)
return NULL;
return g_strndup (start + 1, i - 1);
}