aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-09-23 16:24:28 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-09-23 16:24:28 +0000
commit1d0894d30228c49d51e47a54de25ce1a58d83fa3 (patch)
treeed580db1dab021535bcb4fd2228a0771aa32c8d2 /src/regex
parent90b02140f35242dda4848d461aaf19fecdcb74df (diff)
downloadgnunet-1d0894d30228c49d51e47a54de25ce1a58d83fa3.tar.gz
gnunet-1d0894d30228c49d51e47a54de25ce1a58d83fa3.zip
DFA path compression
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/regex.c357
-rw-r--r--src/regex/regex_internal.h7
2 files changed, 241 insertions, 123 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index a5d84ce95..580e9a65f 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -562,127 +562,6 @@ struct GNUNET_REGEX_Strided_Context
562 562
563 563
564/** 564/**
565 * Recursive helper function to add strides to a DFA.
566 *
567 * @param cls context, contains stride length and strided transitions DLL.
568 * @param depth current depth of the depth-first traversal of the graph.
569 * @param label current label, string that contains all labels on the path from
570 * 'start' to 's'.
571 * @param start start state for the depth-first traversal of the graph.
572 * @param s current state in the depth-first traversal
573 */
574void
575add_multi_strides_to_dfa_helper (void *cls, const unsigned int depth,
576 char *label, struct GNUNET_REGEX_State *start,
577 struct GNUNET_REGEX_State *s)
578{
579 struct GNUNET_REGEX_Strided_Context *ctx = cls;
580 struct GNUNET_REGEX_Transition *t;
581 char *new_label;
582
583 if (depth == ctx->stride)
584 {
585 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
586 t->label = GNUNET_strdup (label);
587 t->to_state = s;
588 t->from_state = start;
589 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail,
590 t);
591 }
592 else
593 {
594 for (t = s->transitions_head; NULL != t; t = t->next)
595 {
596 /* Do not consider self-loops, because it end's up in too many
597 * transitions */
598 if (t->to_state == t->from_state)
599 continue;
600
601 if (NULL != label)
602 {
603 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
604 }
605 else
606 new_label = GNUNET_strdup (t->label);
607
608 add_multi_strides_to_dfa_helper (cls, (depth + 1), new_label, start,
609 t->to_state);
610 }
611 }
612 GNUNET_free_non_null (label);
613}
614
615
616/**
617 * Function called for each state in the DFA. Starts a traversal of depth set in
618 * context starting from state 's'.
619 *
620 * @param cls context.
621 * @param count not used.
622 * @param s current state.
623 */
624void
625add_multi_strides_to_dfa (void *cls, const unsigned int count,
626 struct GNUNET_REGEX_State *s)
627{
628 add_multi_strides_to_dfa_helper (cls, 0, NULL, s, s);
629}
630
631
632/**
633 * Adds multi-strided transitions to the given 'dfa'.
634 *
635 * @param regex_ctx regex context needed to add transitions to the automaton.
636 * @param dfa DFA to which the multi strided transitions should be added.
637 * @param stride_len length of the strides.
638 */
639void
640GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
641 struct GNUNET_REGEX_Automaton *dfa,
642 const unsigned int stride_len)
643{
644 struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL };
645 struct GNUNET_REGEX_State *s;
646 struct GNUNET_REGEX_State *s_next;
647 struct GNUNET_REGEX_Transition *t;
648 struct GNUNET_REGEX_Transition *t_next;
649
650 if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
651 return;
652
653 // Unmark all states
654 for (s = dfa->states_head; NULL != s; s = s->next)
655 s->marked = GNUNET_NO;
656
657 // Compute the new transitions of given stride_len
658 GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL,
659 &add_multi_strides_to_dfa, &ctx);
660
661 // Add all the new transitions to the automaton.
662 for (t = ctx.transitions_head; NULL != t; t = t_next)
663 {
664 t_next = t->next;
665 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
666 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
667 GNUNET_free_non_null (t->label);
668 GNUNET_free (t);
669 }
670
671 // Remove marked states (including their incoming and outgoing transitions)
672 for (s = dfa->states_head; NULL != s; s = s_next)
673 {
674 s_next = s->next;
675 if (GNUNET_YES == s->marked)
676 automaton_remove_state (dfa, s);
677 }
678
679 // Mark this automaton as multistrided
680 dfa->is_multistrided = GNUNET_YES;
681}
682
683
684
685/**
686 * Check if the given string 'str' needs parentheses around it when 565 * Check if the given string 'str' needs parentheses around it when
687 * using it to generate a regex. 566 * using it to generate a regex.
688 * 567 *
@@ -1689,6 +1568,237 @@ dfa_minimize (struct GNUNET_REGEX_Context *ctx,
1689 1568
1690 1569
1691/** 1570/**
1571 * Recursive helper function to add strides to a DFA.
1572 *
1573 * @param cls context, contains stride length and strided transitions DLL.
1574 * @param depth current depth of the depth-first traversal of the graph.
1575 * @param label current label, string that contains all labels on the path from
1576 * 'start' to 's'.
1577 * @param start start state for the depth-first traversal of the graph.
1578 * @param s current state in the depth-first traversal
1579 */
1580void
1581dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label,
1582 struct GNUNET_REGEX_State *start,
1583 struct GNUNET_REGEX_State *s)
1584{
1585 struct GNUNET_REGEX_Strided_Context *ctx = cls;
1586 struct GNUNET_REGEX_Transition *t;
1587 char *new_label;
1588
1589 if (depth == ctx->stride)
1590 {
1591 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
1592 t->label = GNUNET_strdup (label);
1593 t->to_state = s;
1594 t->from_state = start;
1595 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail,
1596 t);
1597 }
1598 else
1599 {
1600 for (t = s->transitions_head; NULL != t; t = t->next)
1601 {
1602 /* Do not consider self-loops, because it end's up in too many
1603 * transitions */
1604 if (t->to_state == t->from_state)
1605 continue;
1606
1607 if (NULL != label)
1608 {
1609 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
1610 }
1611 else
1612 new_label = GNUNET_strdup (t->label);
1613
1614 dfa_add_multi_strides_helper (cls, (depth + 1), new_label, start,
1615 t->to_state);
1616 }
1617 }
1618 GNUNET_free_non_null (label);
1619}
1620
1621
1622/**
1623 * Function called for each state in the DFA. Starts a traversal of depth set in
1624 * context starting from state 's'.
1625 *
1626 * @param cls context.
1627 * @param count not used.
1628 * @param s current state.
1629 */
1630void
1631dfa_add_multi_strides (void *cls, const unsigned int count,
1632 struct GNUNET_REGEX_State *s)
1633{
1634 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
1635}
1636
1637
1638/**
1639 * Adds multi-strided transitions to the given 'dfa'.
1640 *
1641 * @param regex_ctx regex context needed to add transitions to the automaton.
1642 * @param dfa DFA to which the multi strided transitions should be added.
1643 * @param stride_len length of the strides.
1644 */
1645void
1646GNUNET_REGEX_dfa_add_multi_strides (struct GNUNET_REGEX_Context *regex_ctx,
1647 struct GNUNET_REGEX_Automaton *dfa,
1648 const unsigned int stride_len)
1649{
1650 struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL };
1651 struct GNUNET_REGEX_Transition *t;
1652 struct GNUNET_REGEX_Transition *t_next;
1653
1654 if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
1655 return;
1656
1657 // Compute the new transitions of given stride_len
1658 GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL,
1659 &dfa_add_multi_strides, &ctx);
1660
1661 // Add all the new transitions to the automaton.
1662 for (t = ctx.transitions_head; NULL != t; t = t_next)
1663 {
1664 t_next = t->next;
1665 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
1666 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
1667 GNUNET_free_non_null (t->label);
1668 GNUNET_free (t);
1669 }
1670
1671 // Mark this automaton as multistrided
1672 dfa->is_multistrided = GNUNET_YES;
1673}
1674
1675/**
1676 * Recursive Helper function for DFA path compression. Does DFS on the DFA graph
1677 * and adds new transitions to the given transitions DLL and marks states that
1678 * should be removed by setting state->contained to GNUNET_YES.
1679 *
1680 * @param start starting state for linear path search.
1681 * @param cur current state in the recursive DFS.
1682 * @param label current label (string of traversed labels).
1683 * @param transitions_head transitions DLL.
1684 * @param transitions_tail transitions DLL.
1685 */
1686void
1687dfa_compress_paths_helper (struct GNUNET_REGEX_State *start,
1688 struct GNUNET_REGEX_State *cur, char *label,
1689 struct GNUNET_REGEX_Transition **transitions_head,
1690 struct GNUNET_REGEX_Transition **transitions_tail)
1691{
1692 struct GNUNET_REGEX_Transition *t;
1693 char *new_label;
1694
1695
1696 if (NULL != label &&
1697 (cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
1698 cur->transition_count > 1 || GNUNET_YES == cur->marked))
1699 {
1700 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
1701 t->label = GNUNET_strdup (label);
1702 t->to_state = cur;
1703 t->from_state = start;
1704 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
1705
1706 GNUNET_free_non_null (label);
1707
1708 if (GNUNET_NO == cur->marked)
1709 {
1710 dfa_compress_paths_helper (cur, cur, NULL, transitions_head,
1711 transitions_tail);
1712 }
1713 return;
1714 }
1715 else if (cur != start)
1716 cur->contained = GNUNET_YES;
1717
1718 if (GNUNET_YES == cur->marked && cur != start)
1719 return;
1720
1721 cur->marked = GNUNET_YES;
1722
1723
1724 for (t = cur->transitions_head; NULL != t; t = t->next)
1725 {
1726 if (NULL != label)
1727 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
1728 else
1729 new_label = GNUNET_strdup (t->label);
1730
1731 if (t->to_state != cur)
1732 {
1733 dfa_compress_paths_helper (start, t->to_state, new_label,
1734 transitions_head, transitions_tail);
1735 }
1736 }
1737}
1738
1739/**
1740 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
1741 * compressed to 0->3 by combining transitions.
1742 *
1743 * @param regex_ctx context for adding new transitions.
1744 * @param dfa DFA representation, will directly modify the given DFA.
1745 */
1746static void
1747dfa_compress_paths (struct GNUNET_REGEX_Context *regex_ctx,
1748 struct GNUNET_REGEX_Automaton *dfa)
1749{
1750 struct GNUNET_REGEX_State *s;
1751 struct GNUNET_REGEX_State *s_next;
1752 struct GNUNET_REGEX_Transition *t;
1753 struct GNUNET_REGEX_Transition *t_next;
1754 struct GNUNET_REGEX_Transition *transitions_head = NULL;
1755 struct GNUNET_REGEX_Transition *transitions_tail = NULL;
1756
1757 if (NULL == dfa)
1758 return;
1759
1760 // Count the incoming transitions on each state.
1761 for (s = dfa->states_head; NULL != s; s = s->next)
1762 {
1763 for (t = s->transitions_head; NULL != t; t = t->next)
1764 {
1765 if (NULL != t->to_state)
1766 t->to_state->incoming_transition_count++;
1767 }
1768 }
1769
1770 // Unmark all states.
1771 for (s = dfa->states_head; NULL != s; s = s->next)
1772 {
1773 s->marked = GNUNET_NO;
1774 s->contained = GNUNET_NO;
1775 }
1776
1777 // Add strides and mark states that can be deleted.
1778 dfa_compress_paths_helper (dfa->start, dfa->start, NULL, &transitions_head,
1779 &transitions_tail);
1780
1781 // Add all the new transitions to the automaton.
1782 for (t = transitions_head; NULL != t; t = t_next)
1783 {
1784 t_next = t->next;
1785 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
1786 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
1787 GNUNET_free_non_null (t->label);
1788 GNUNET_free (t);
1789 }
1790
1791 // Remove marked states (including their incoming and outgoing transitions).
1792 for (s = dfa->states_head; NULL != s; s = s_next)
1793 {
1794 s_next = s->next;
1795 if (GNUNET_YES == s->contained)
1796 automaton_remove_state (dfa, s);
1797 }
1798}
1799
1800
1801/**
1692 * Creates a new NFA fragment. Needs to be cleared using 1802 * Creates a new NFA fragment. Needs to be cleared using
1693 * automaton_fragment_clear. 1803 * automaton_fragment_clear.
1694 * 1804 *
@@ -2422,11 +2532,14 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
2422 // Minimize DFA 2532 // Minimize DFA
2423 dfa_minimize (&ctx, dfa); 2533 dfa_minimize (&ctx, dfa);
2424 2534
2535 // Compress DFA paths
2536 dfa_compress_paths (&ctx, dfa);
2537
2425 // Create proofs for all states 2538 // Create proofs for all states
2426 automaton_create_proofs (dfa); 2539 automaton_create_proofs (dfa);
2427 2540
2428 // Add strides to DFA 2541 // Add strides to DFA
2429 // GNUNET_REGEX_add_multi_strides_to_dfa (&ctx, dfa, 2); 2542 //GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2);
2430 2543
2431 return dfa; 2544 return dfa;
2432} 2545}
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
index 8b576a90e..7ab51ba69 100644
--- a/src/regex/regex_internal.h
+++ b/src/regex/regex_internal.h
@@ -120,7 +120,7 @@ struct GNUNET_REGEX_State
120 120
121 /** 121 /**
122 * Marking the state as contained. This is used for checking, if the state is 122 * Marking the state as contained. This is used for checking, if the state is
123 * contained in a set in constant time 123 * contained in a set in constant time.
124 */ 124 */
125 int contained; 125 int contained;
126 126
@@ -181,6 +181,11 @@ struct GNUNET_REGEX_State
181 struct GNUNET_REGEX_Transition *transitions_tail; 181 struct GNUNET_REGEX_Transition *transitions_tail;
182 182
183 /** 183 /**
184 * Number of incoming transitions. Used for compressing DFA paths.
185 */
186 unsigned int incoming_transition_count;
187
188 /**
184 * Set of states on which this state is based on. Used when creating a DFA out 189 * Set of states on which this state is based on. Used when creating a DFA out
185 * of several NFA states. 190 * of several NFA states.
186 */ 191 */