/* blib - Library of useful things to hack the Blinkenlights * * Copyright (C) 2002 The Blinkenlights Crew * Sven Neumann * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "config.h" #include #include #include #include "btypes.h" #include "bparser.h" struct _BParser { GMarkupParseContext *context; BParserState state; BParserState last_state; gint unknown_depth; GString *cdata; gpointer user_data; BParserStartFunc start_element; BParserEndFunc end_element; }; static void parser_start_element (GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error); static void parser_end_element (GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error); static void parser_text (GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error); static void parser_start_unknown (BParser *parser); static void parser_end_unknown (BParser *parser); static const GMarkupParser markup_parser = { parser_start_element, parser_end_element, parser_text, NULL, /* passthrough */ NULL, /* error */ }; /** * b_parser_new: * @start_element: the function to call when an element is started * @end_element: the function to call when an element is closed * @user_data: data to pass to the functions above * * Creates a new #BParser suited to parse XML files. The #BParser * should later be freed using b_parser_free(). * * Return value: a newly allocated #BParser **/ BParser * b_parser_new (BParserStartFunc start_element, BParserEndFunc end_element, gpointer user_data) { BParser *parser; parser = g_new0 (BParser, 1); parser->context = g_markup_parse_context_new (&markup_parser, 0, parser, NULL); parser->state = B_PARSER_STATE_TOPLEVEL; parser->cdata = g_string_new (NULL); parser->user_data = user_data; parser->start_element = start_element; parser->end_element = end_element; return parser; } /** * b_parser_parse: * @parser: a #BParser * @text: pointer to a text buffer to parse * @text_len: the number of bytes to parse from @text * @error: location to store the error occuring, or %NULL to ignore errors * * Let the @parser process a chunk of @text. You need to call * b_parser_end_parse() after you passed the last chunk to the @parser. * * Return value: %TRUE if parsing was successful, %FALSE if an error occured **/ gboolean b_parser_parse (BParser *parser, const gchar *text, gssize text_len, GError **error) { g_return_val_if_fail (parser != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); return g_markup_parse_context_parse (parser->context, text, text_len, error); } /** * b_parser_end_parse: * @parser: a #BParser * @error: location to store the error occuring, or %NULL to ignore errors * * Finishes the @parser. After calling this function, you must not * call b_parser_parse() on the parser again. * * Return value: %TRUE if @parser was successfully finished, %FALSE * otherwise **/ gboolean b_parser_end_parse (BParser *parser, GError **error) { g_return_val_if_fail (parser != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); return g_markup_parse_context_end_parse (parser->context, error); } /** * b_parser_parse_io_channel: * @parser: a #BParser * @io: a #GIOChannel to read the text to parse from * @recode: %TRUE if you want the parser to do automatic encoding conversion * @error: location to store the error occuring, or %NULL to ignore errors * * Reads data from the #GIOChannel @io and passes it to @parser. If * @recode is TRUE, the data should start with an XML header so this * function can determine the encoding of the XML data and convert it * to UTF-8 for you. * * Return value: %TRUE if parsing was successful, %FALSE otherwise **/ gboolean b_parser_parse_io_channel (BParser *parser, GIOChannel *io, gboolean recode, GError **error) { GIOStatus status; guchar buffer[8192]; gsize len = 0; gsize bytes; g_return_val_if_fail (parser != NULL, FALSE); g_return_val_if_fail (io != NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE); if (recode) { const gchar *io_encoding = g_io_channel_get_encoding (io); gchar *encoding = NULL; if (io_encoding && strcmp (io_encoding, "UTF-8")) { g_warning ("b_parser_parse_io_channel(): " "The encoding has already been set on this IOChannel!"); return FALSE; } /* try to determine the encoding */ g_io_channel_set_encoding (io, NULL, NULL); while (len < sizeof (buffer) && !encoding) { status = g_io_channel_read_chars (io, buffer + len, 1, &bytes, error); len += bytes; if (status == G_IO_STATUS_ERROR) return FALSE; if (status == G_IO_STATUS_EOF) break; encoding = b_parse_encoding (buffer, len); } if (encoding) { if (!g_io_channel_set_encoding (io, encoding, error)) return FALSE; g_free (encoding); } else { g_io_channel_set_encoding (io, "UTF-8", NULL); } } while (TRUE) { if (!b_parser_parse (parser, buffer, len, error)) return FALSE; status = g_io_channel_read_chars (io, buffer, sizeof(buffer), &len, error); switch (status) { case G_IO_STATUS_ERROR: return FALSE; case G_IO_STATUS_EOF: return b_parser_end_parse (parser, error); case G_IO_STATUS_NORMAL: case G_IO_STATUS_AGAIN: break; } } } /** * b_parser_free: * @parser: a #BParser * * Frees the resources allocated for @parser. You must not access * @parser after calling this function. **/ void b_parser_free (BParser *parser) { g_return_if_fail (parser != NULL); g_markup_parse_context_free (parser->context); g_string_free (parser->cdata, TRUE); g_free (parser); } /** * b_parser_get_state: * @parser: a #BParser * * Retrieves the current state of @parser. * * Return value: the state of @parser **/ BParserState b_parser_get_state (BParser *parser) { g_return_val_if_fail (parser != NULL, B_PARSER_STATE_UNKNOWN); return parser->state; } static void parser_start_element (GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error) { BParserState new_state; BParser *parser = (BParser *) user_data; switch (parser->state) { case B_PARSER_STATE_TOPLEVEL: default: if (parser->start_element && (new_state = parser->start_element (parser->state, element_name, attribute_names, attribute_values, parser->user_data, error))) { parser->last_state = parser->state; parser->state = new_state; break; } /* else fallthru */ case B_PARSER_STATE_UNKNOWN: parser_start_unknown (parser); break; } g_string_truncate (parser->cdata, 0); } static void parser_end_element (GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error) { BParser *parser = (BParser *) user_data; switch (parser->state) { case B_PARSER_STATE_TOPLEVEL: g_assert_not_reached (); break; default: if (parser->end_element) { gint len; /* strip trailing spaces */ for (len = parser->cdata->len; len > 0 && g_ascii_isspace (parser->cdata->str[len-1]); len--) ; /* do nothing */ g_string_truncate (parser->cdata, len); parser->state = parser->end_element (parser->state, element_name, parser->cdata->str, parser->cdata->len, parser->user_data, error); break; } /* else fallthru */ case B_PARSER_STATE_UNKNOWN: parser_end_unknown (parser); break; } g_string_truncate (parser->cdata, 0); } static void parser_text (GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error) { BParser *parser = (BParser *) user_data; gboolean space; gint i; space = (parser->cdata->len == 0 || g_ascii_isspace (parser->cdata->str[parser->cdata->len])); for (i = 0; i < text_len; i++) { if (g_ascii_isspace (text[i])) { if (space) continue; space = TRUE; } else { space = FALSE; } g_string_append_c (parser->cdata, text[i]); } } static void parser_start_unknown (BParser *parser) { if (parser->unknown_depth == 0) { parser->last_state = parser->state; parser->state = B_PARSER_STATE_UNKNOWN; } parser->unknown_depth++; } static void parser_end_unknown (BParser *parser) { parser->unknown_depth--; if (parser->unknown_depth == 0) parser->state = parser->last_state; } /** * b_parse_encoding: * @text: a string to parse, must be at least 20 bytes * @text_len: the maximum number of bytes to parse from @text * * Scans the @text for an XML header with encoding specification. * * Return value: a copy of the encoding string or %NULL if none was * found **/ gchar * b_parse_encoding (const gchar *text, gint text_len) { const gchar *start; const gchar *end; gint i; g_return_val_if_fail (text, NULL); if (text_len < 20) return NULL; start = g_strstr_len (text, text_len, ""); if (!end) return NULL; text_len = end - start; if (text_len < 12) return NULL; start = g_strstr_len (start + 1, text_len - 1, "encoding="); if (!start) return NULL; start += 9; if (*start != '\"' && *start != '\'') return NULL; text_len = end - start; if (text_len < 1) return NULL; for (i = 1; i < text_len; i++) if (start[i] == start[0]) break; if (i == text_len || i < 3) return NULL; return g_strndup (start + 1, i - 1); }