diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-09-23 16:24:28 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-09-23 16:24:28 +0000 |
commit | 1d0894d30228c49d51e47a54de25ce1a58d83fa3 (patch) | |
tree | ed580db1dab021535bcb4fd2228a0771aa32c8d2 /src/regex | |
parent | 90b02140f35242dda4848d461aaf19fecdcb74df (diff) | |
download | gnunet-1d0894d30228c49d51e47a54de25ce1a58d83fa3.tar.gz gnunet-1d0894d30228c49d51e47a54de25ce1a58d83fa3.zip |
DFA path compression
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/regex.c | 357 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 7 |
2 files changed, 241 insertions, 123 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index a5d84ce95..580e9a65f 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -562,127 +562,6 @@ struct GNUNET_REGEX_Strided_Context | |||
562 | 562 | ||
563 | 563 | ||
564 | /** | 564 | /** |
565 | * Recursive helper function to add strides to a DFA. | ||
566 | * | ||
567 | * @param cls context, contains stride length and strided transitions DLL. | ||
568 | * @param depth current depth of the depth-first traversal of the graph. | ||
569 | * @param label current label, string that contains all labels on the path from | ||
570 | * 'start' to 's'. | ||
571 | * @param start start state for the depth-first traversal of the graph. | ||
572 | * @param s current state in the depth-first traversal | ||
573 | */ | ||
574 | void | ||
575 | add_multi_strides_to_dfa_helper (void *cls, const unsigned int depth, | ||
576 | char *label, struct GNUNET_REGEX_State *start, | ||
577 | struct GNUNET_REGEX_State *s) | ||
578 | { | ||
579 | struct GNUNET_REGEX_Strided_Context *ctx = cls; | ||
580 | struct GNUNET_REGEX_Transition *t; | ||
581 | char *new_label; | ||
582 | |||
583 | if (depth == ctx->stride) | ||
584 | { | ||
585 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); | ||
586 | t->label = GNUNET_strdup (label); | ||
587 | t->to_state = s; | ||
588 | t->from_state = start; | ||
589 | GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail, | ||
590 | t); | ||
591 | } | ||
592 | else | ||
593 | { | ||
594 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
595 | { | ||
596 | /* Do not consider self-loops, because it end's up in too many | ||
597 | * transitions */ | ||
598 | if (t->to_state == t->from_state) | ||
599 | continue; | ||
600 | |||
601 | if (NULL != label) | ||
602 | { | ||
603 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
604 | } | ||
605 | else | ||
606 | new_label = GNUNET_strdup (t->label); | ||
607 | |||
608 | add_multi_strides_to_dfa_helper (cls, (depth + 1), new_label, start, | ||
609 | t->to_state); | ||
610 | } | ||
611 | } | ||
612 | GNUNET_free_non_null (label); | ||
613 | } | ||
614 | |||
615 | |||
616 | /** | ||
617 | * Function called for each state in the DFA. Starts a traversal of depth set in | ||
618 | * context starting from state 's'. | ||
619 | * | ||
620 | * @param cls context. | ||
621 | * @param count not used. | ||
622 | * @param s current state. | ||
623 | */ | ||
624 | void | ||
625 | add_multi_strides_to_dfa (void *cls, const unsigned int count, | ||
626 | struct GNUNET_REGEX_State *s) | ||
627 | { | ||
628 | add_multi_strides_to_dfa_helper (cls, 0, NULL, s, s); | ||
629 | } | ||
630 | |||
631 | |||
632 | /** | ||
633 | * Adds multi-strided transitions to the given 'dfa'. | ||
634 | * | ||
635 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
636 | * @param dfa DFA to which the multi strided transitions should be added. | ||
637 | * @param stride_len length of the strides. | ||
638 | */ | ||
639 | void | ||
640 | GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx, | ||
641 | struct GNUNET_REGEX_Automaton *dfa, | ||
642 | const unsigned int stride_len) | ||
643 | { | ||
644 | struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL }; | ||
645 | struct GNUNET_REGEX_State *s; | ||
646 | struct GNUNET_REGEX_State *s_next; | ||
647 | struct GNUNET_REGEX_Transition *t; | ||
648 | struct GNUNET_REGEX_Transition *t_next; | ||
649 | |||
650 | if (1 > stride_len || GNUNET_YES == dfa->is_multistrided) | ||
651 | return; | ||
652 | |||
653 | // Unmark all states | ||
654 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
655 | s->marked = GNUNET_NO; | ||
656 | |||
657 | // Compute the new transitions of given stride_len | ||
658 | GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL, | ||
659 | &add_multi_strides_to_dfa, &ctx); | ||
660 | |||
661 | // Add all the new transitions to the automaton. | ||
662 | for (t = ctx.transitions_head; NULL != t; t = t_next) | ||
663 | { | ||
664 | t_next = t->next; | ||
665 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
666 | GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t); | ||
667 | GNUNET_free_non_null (t->label); | ||
668 | GNUNET_free (t); | ||
669 | } | ||
670 | |||
671 | // Remove marked states (including their incoming and outgoing transitions) | ||
672 | for (s = dfa->states_head; NULL != s; s = s_next) | ||
673 | { | ||
674 | s_next = s->next; | ||
675 | if (GNUNET_YES == s->marked) | ||
676 | automaton_remove_state (dfa, s); | ||
677 | } | ||
678 | |||
679 | // Mark this automaton as multistrided | ||
680 | dfa->is_multistrided = GNUNET_YES; | ||
681 | } | ||
682 | |||
683 | |||
684 | |||
685 | /** | ||
686 | * Check if the given string 'str' needs parentheses around it when | 565 | * Check if the given string 'str' needs parentheses around it when |
687 | * using it to generate a regex. | 566 | * using it to generate a regex. |
688 | * | 567 | * |
@@ -1689,6 +1568,237 @@ dfa_minimize (struct GNUNET_REGEX_Context *ctx, | |||
1689 | 1568 | ||
1690 | 1569 | ||
1691 | /** | 1570 | /** |
1571 | * Recursive helper function to add strides to a DFA. | ||
1572 | * | ||
1573 | * @param cls context, contains stride length and strided transitions DLL. | ||
1574 | * @param depth current depth of the depth-first traversal of the graph. | ||
1575 | * @param label current label, string that contains all labels on the path from | ||
1576 | * 'start' to 's'. | ||
1577 | * @param start start state for the depth-first traversal of the graph. | ||
1578 | * @param s current state in the depth-first traversal | ||
1579 | */ | ||
1580 | void | ||
1581 | dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label, | ||
1582 | struct GNUNET_REGEX_State *start, | ||
1583 | struct GNUNET_REGEX_State *s) | ||
1584 | { | ||
1585 | struct GNUNET_REGEX_Strided_Context *ctx = cls; | ||
1586 | struct GNUNET_REGEX_Transition *t; | ||
1587 | char *new_label; | ||
1588 | |||
1589 | if (depth == ctx->stride) | ||
1590 | { | ||
1591 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); | ||
1592 | t->label = GNUNET_strdup (label); | ||
1593 | t->to_state = s; | ||
1594 | t->from_state = start; | ||
1595 | GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail, | ||
1596 | t); | ||
1597 | } | ||
1598 | else | ||
1599 | { | ||
1600 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
1601 | { | ||
1602 | /* Do not consider self-loops, because it end's up in too many | ||
1603 | * transitions */ | ||
1604 | if (t->to_state == t->from_state) | ||
1605 | continue; | ||
1606 | |||
1607 | if (NULL != label) | ||
1608 | { | ||
1609 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
1610 | } | ||
1611 | else | ||
1612 | new_label = GNUNET_strdup (t->label); | ||
1613 | |||
1614 | dfa_add_multi_strides_helper (cls, (depth + 1), new_label, start, | ||
1615 | t->to_state); | ||
1616 | } | ||
1617 | } | ||
1618 | GNUNET_free_non_null (label); | ||
1619 | } | ||
1620 | |||
1621 | |||
1622 | /** | ||
1623 | * Function called for each state in the DFA. Starts a traversal of depth set in | ||
1624 | * context starting from state 's'. | ||
1625 | * | ||
1626 | * @param cls context. | ||
1627 | * @param count not used. | ||
1628 | * @param s current state. | ||
1629 | */ | ||
1630 | void | ||
1631 | dfa_add_multi_strides (void *cls, const unsigned int count, | ||
1632 | struct GNUNET_REGEX_State *s) | ||
1633 | { | ||
1634 | dfa_add_multi_strides_helper (cls, 0, NULL, s, s); | ||
1635 | } | ||
1636 | |||
1637 | |||
1638 | /** | ||
1639 | * Adds multi-strided transitions to the given 'dfa'. | ||
1640 | * | ||
1641 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
1642 | * @param dfa DFA to which the multi strided transitions should be added. | ||
1643 | * @param stride_len length of the strides. | ||
1644 | */ | ||
1645 | void | ||
1646 | GNUNET_REGEX_dfa_add_multi_strides (struct GNUNET_REGEX_Context *regex_ctx, | ||
1647 | struct GNUNET_REGEX_Automaton *dfa, | ||
1648 | const unsigned int stride_len) | ||
1649 | { | ||
1650 | struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL }; | ||
1651 | struct GNUNET_REGEX_Transition *t; | ||
1652 | struct GNUNET_REGEX_Transition *t_next; | ||
1653 | |||
1654 | if (1 > stride_len || GNUNET_YES == dfa->is_multistrided) | ||
1655 | return; | ||
1656 | |||
1657 | // Compute the new transitions of given stride_len | ||
1658 | GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL, | ||
1659 | &dfa_add_multi_strides, &ctx); | ||
1660 | |||
1661 | // Add all the new transitions to the automaton. | ||
1662 | for (t = ctx.transitions_head; NULL != t; t = t_next) | ||
1663 | { | ||
1664 | t_next = t->next; | ||
1665 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
1666 | GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t); | ||
1667 | GNUNET_free_non_null (t->label); | ||
1668 | GNUNET_free (t); | ||
1669 | } | ||
1670 | |||
1671 | // Mark this automaton as multistrided | ||
1672 | dfa->is_multistrided = GNUNET_YES; | ||
1673 | } | ||
1674 | |||
1675 | /** | ||
1676 | * Recursive Helper function for DFA path compression. Does DFS on the DFA graph | ||
1677 | * and adds new transitions to the given transitions DLL and marks states that | ||
1678 | * should be removed by setting state->contained to GNUNET_YES. | ||
1679 | * | ||
1680 | * @param start starting state for linear path search. | ||
1681 | * @param cur current state in the recursive DFS. | ||
1682 | * @param label current label (string of traversed labels). | ||
1683 | * @param transitions_head transitions DLL. | ||
1684 | * @param transitions_tail transitions DLL. | ||
1685 | */ | ||
1686 | void | ||
1687 | dfa_compress_paths_helper (struct GNUNET_REGEX_State *start, | ||
1688 | struct GNUNET_REGEX_State *cur, char *label, | ||
1689 | struct GNUNET_REGEX_Transition **transitions_head, | ||
1690 | struct GNUNET_REGEX_Transition **transitions_tail) | ||
1691 | { | ||
1692 | struct GNUNET_REGEX_Transition *t; | ||
1693 | char *new_label; | ||
1694 | |||
1695 | |||
1696 | if (NULL != label && | ||
1697 | (cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting || | ||
1698 | cur->transition_count > 1 || GNUNET_YES == cur->marked)) | ||
1699 | { | ||
1700 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); | ||
1701 | t->label = GNUNET_strdup (label); | ||
1702 | t->to_state = cur; | ||
1703 | t->from_state = start; | ||
1704 | GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t); | ||
1705 | |||
1706 | GNUNET_free_non_null (label); | ||
1707 | |||
1708 | if (GNUNET_NO == cur->marked) | ||
1709 | { | ||
1710 | dfa_compress_paths_helper (cur, cur, NULL, transitions_head, | ||
1711 | transitions_tail); | ||
1712 | } | ||
1713 | return; | ||
1714 | } | ||
1715 | else if (cur != start) | ||
1716 | cur->contained = GNUNET_YES; | ||
1717 | |||
1718 | if (GNUNET_YES == cur->marked && cur != start) | ||
1719 | return; | ||
1720 | |||
1721 | cur->marked = GNUNET_YES; | ||
1722 | |||
1723 | |||
1724 | for (t = cur->transitions_head; NULL != t; t = t->next) | ||
1725 | { | ||
1726 | if (NULL != label) | ||
1727 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
1728 | else | ||
1729 | new_label = GNUNET_strdup (t->label); | ||
1730 | |||
1731 | if (t->to_state != cur) | ||
1732 | { | ||
1733 | dfa_compress_paths_helper (start, t->to_state, new_label, | ||
1734 | transitions_head, transitions_tail); | ||
1735 | } | ||
1736 | } | ||
1737 | } | ||
1738 | |||
1739 | /** | ||
1740 | * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be | ||
1741 | * compressed to 0->3 by combining transitions. | ||
1742 | * | ||
1743 | * @param regex_ctx context for adding new transitions. | ||
1744 | * @param dfa DFA representation, will directly modify the given DFA. | ||
1745 | */ | ||
1746 | static void | ||
1747 | dfa_compress_paths (struct GNUNET_REGEX_Context *regex_ctx, | ||
1748 | struct GNUNET_REGEX_Automaton *dfa) | ||
1749 | { | ||
1750 | struct GNUNET_REGEX_State *s; | ||
1751 | struct GNUNET_REGEX_State *s_next; | ||
1752 | struct GNUNET_REGEX_Transition *t; | ||
1753 | struct GNUNET_REGEX_Transition *t_next; | ||
1754 | struct GNUNET_REGEX_Transition *transitions_head = NULL; | ||
1755 | struct GNUNET_REGEX_Transition *transitions_tail = NULL; | ||
1756 | |||
1757 | if (NULL == dfa) | ||
1758 | return; | ||
1759 | |||
1760 | // Count the incoming transitions on each state. | ||
1761 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
1762 | { | ||
1763 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
1764 | { | ||
1765 | if (NULL != t->to_state) | ||
1766 | t->to_state->incoming_transition_count++; | ||
1767 | } | ||
1768 | } | ||
1769 | |||
1770 | // Unmark all states. | ||
1771 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
1772 | { | ||
1773 | s->marked = GNUNET_NO; | ||
1774 | s->contained = GNUNET_NO; | ||
1775 | } | ||
1776 | |||
1777 | // Add strides and mark states that can be deleted. | ||
1778 | dfa_compress_paths_helper (dfa->start, dfa->start, NULL, &transitions_head, | ||
1779 | &transitions_tail); | ||
1780 | |||
1781 | // Add all the new transitions to the automaton. | ||
1782 | for (t = transitions_head; NULL != t; t = t_next) | ||
1783 | { | ||
1784 | t_next = t->next; | ||
1785 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
1786 | GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t); | ||
1787 | GNUNET_free_non_null (t->label); | ||
1788 | GNUNET_free (t); | ||
1789 | } | ||
1790 | |||
1791 | // Remove marked states (including their incoming and outgoing transitions). | ||
1792 | for (s = dfa->states_head; NULL != s; s = s_next) | ||
1793 | { | ||
1794 | s_next = s->next; | ||
1795 | if (GNUNET_YES == s->contained) | ||
1796 | automaton_remove_state (dfa, s); | ||
1797 | } | ||
1798 | } | ||
1799 | |||
1800 | |||
1801 | /** | ||
1692 | * Creates a new NFA fragment. Needs to be cleared using | 1802 | * Creates a new NFA fragment. Needs to be cleared using |
1693 | * automaton_fragment_clear. | 1803 | * automaton_fragment_clear. |
1694 | * | 1804 | * |
@@ -2422,11 +2532,14 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
2422 | // Minimize DFA | 2532 | // Minimize DFA |
2423 | dfa_minimize (&ctx, dfa); | 2533 | dfa_minimize (&ctx, dfa); |
2424 | 2534 | ||
2535 | // Compress DFA paths | ||
2536 | dfa_compress_paths (&ctx, dfa); | ||
2537 | |||
2425 | // Create proofs for all states | 2538 | // Create proofs for all states |
2426 | automaton_create_proofs (dfa); | 2539 | automaton_create_proofs (dfa); |
2427 | 2540 | ||
2428 | // Add strides to DFA | 2541 | // Add strides to DFA |
2429 | // GNUNET_REGEX_add_multi_strides_to_dfa (&ctx, dfa, 2); | 2542 | //GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2); |
2430 | 2543 | ||
2431 | return dfa; | 2544 | return dfa; |
2432 | } | 2545 | } |
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index 8b576a90e..7ab51ba69 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -120,7 +120,7 @@ struct GNUNET_REGEX_State | |||
120 | 120 | ||
121 | /** | 121 | /** |
122 | * Marking the state as contained. This is used for checking, if the state is | 122 | * Marking the state as contained. This is used for checking, if the state is |
123 | * contained in a set in constant time | 123 | * contained in a set in constant time. |
124 | */ | 124 | */ |
125 | int contained; | 125 | int contained; |
126 | 126 | ||
@@ -181,6 +181,11 @@ struct GNUNET_REGEX_State | |||
181 | struct GNUNET_REGEX_Transition *transitions_tail; | 181 | struct GNUNET_REGEX_Transition *transitions_tail; |
182 | 182 | ||
183 | /** | 183 | /** |
184 | * Number of incoming transitions. Used for compressing DFA paths. | ||
185 | */ | ||
186 | unsigned int incoming_transition_count; | ||
187 | |||
188 | /** | ||
184 | * Set of states on which this state is based on. Used when creating a DFA out | 189 | * Set of states on which this state is based on. Used when creating a DFA out |
185 | * of several NFA states. | 190 | * of several NFA states. |
186 | */ | 191 | */ |